Skip to content

Commit 9906b81

Browse files
authored
fix: [hotfix-2.5.19] Handle empty FieldsData in reduce/rerank for requery scenario (#44919) (#45016)
issue: #44909 pr: #44917 When requery optimization is enabled, search results contain IDs but empty FieldsData. During reduce/rerank operations, if the first shard has empty FieldsData while others have data, PrepareResultFieldData initializes an empty array, causing AppendFieldData to panic when accessing array indices. Changes: - Find first non-empty FieldsData as template in 5 functions: reduceAdvanceGroupBY, reduceSearchResultDataWithGroupBy, reduceSearchResultDataNoGroupBy, rankSearchResultDataByGroup, rankSearchResultDataByPk - Add length check before 4 AppendFieldData calls to prevent panic - Add unit tests for empty and partial empty FieldsData scenarios This fix handles both pure requery (all empty) and mixed scenarios (some empty, some with data) without breaking normal search flow. Signed-off-by: Wei Liu <[email protected]>
1 parent 53b2fbc commit 9906b81

File tree

2 files changed

+305
-11
lines changed

2 files changed

+305
-11
lines changed

internal/proxy/search_reduce_util.go

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,13 @@ func reduceAdvanceGroupBY(ctx context.Context, subSearchResultData []*schemapb.S
9999
} else {
100100
ret.GetResults().AllSearchCount = allSearchCount
101101
limit = int64(hitNum)
102-
ret.GetResults().FieldsData = typeutil.PrepareResultFieldData(subSearchResultData[0].GetFieldsData(), limit)
102+
// Find the first non-empty FieldsData as template
103+
for _, result := range subSearchResultData {
104+
if len(result.GetFieldsData()) > 0 {
105+
ret.GetResults().FieldsData = typeutil.PrepareResultFieldData(result.GetFieldsData(), limit)
106+
break
107+
}
108+
}
103109
}
104110

105111
if err := setupIdListForSearchResult(ret, pkType, limit); err != nil {
@@ -186,7 +192,7 @@ func reduceSearchResultDataWithGroupBy(ctx context.Context, subSearchResultData
186192
Results: &schemapb.SearchResultData{
187193
NumQueries: nq,
188194
TopK: topk,
189-
FieldsData: typeutil.PrepareResultFieldData(subSearchResultData[0].GetFieldsData(), limit),
195+
FieldsData: []*schemapb.FieldData{},
190196
Scores: []float32{},
191197
Ids: &schemapb.IDs{},
192198
Topks: []int64{},
@@ -204,6 +210,14 @@ func reduceSearchResultDataWithGroupBy(ctx context.Context, subSearchResultData
204210
ret.GetResults().AllSearchCount = allSearchCount
205211
}
206212

213+
// Find the first non-empty FieldsData as template
214+
for _, result := range subSearchResultData {
215+
if len(result.GetFieldsData()) > 0 {
216+
ret.GetResults().FieldsData = typeutil.PrepareResultFieldData(result.GetFieldsData(), limit)
217+
break
218+
}
219+
}
220+
207221
var (
208222
subSearchNum = len(subSearchResultData)
209223
// for results of each subSearchResultData, storing the start offset of each query of nq queries
@@ -280,7 +294,9 @@ func reduceSearchResultDataWithGroupBy(ctx context.Context, subSearchResultData
280294
groupEntities := groupByValMap[groupVal]
281295
for _, groupEntity := range groupEntities {
282296
subResData := subSearchResultData[groupEntity.subSearchIdx]
283-
retSize += typeutil.AppendFieldData(ret.Results.FieldsData, subResData.FieldsData, groupEntity.resultIdx)
297+
if len(ret.Results.FieldsData) > 0 {
298+
retSize += typeutil.AppendFieldData(ret.Results.FieldsData, subResData.FieldsData, groupEntity.resultIdx)
299+
}
284300
typeutil.AppendPKs(ret.Results.Ids, groupEntity.id)
285301
ret.Results.Scores = append(ret.Results.Scores, groupEntity.score)
286302
if err := typeutil.AppendGroupByValue(ret.Results, groupVal, subResData.GetGroupByFieldValue().GetType()); err != nil {
@@ -330,7 +346,7 @@ func reduceSearchResultDataNoGroupBy(ctx context.Context, subSearchResultData []
330346
Results: &schemapb.SearchResultData{
331347
NumQueries: nq,
332348
TopK: topk,
333-
FieldsData: typeutil.PrepareResultFieldData(subSearchResultData[0].GetFieldsData(), limit),
349+
FieldsData: []*schemapb.FieldData{},
334350
Scores: []float32{},
335351
Ids: &schemapb.IDs{},
336352
Topks: []int64{},
@@ -348,6 +364,14 @@ func reduceSearchResultDataNoGroupBy(ctx context.Context, subSearchResultData []
348364
ret.GetResults().AllSearchCount = allSearchCount
349365
}
350366

367+
// Find the first non-empty FieldsData as template
368+
for _, result := range subSearchResultData {
369+
if len(result.GetFieldsData()) > 0 {
370+
ret.GetResults().FieldsData = typeutil.PrepareResultFieldData(result.GetFieldsData(), limit)
371+
break
372+
}
373+
}
374+
351375
subSearchNum := len(subSearchResultData)
352376
if subSearchNum == 1 && offset == 0 {
353377
// sorting is not needed if there is only one shard and no offset, assigning the result directly.
@@ -401,7 +425,9 @@ func reduceSearchResultDataNoGroupBy(ctx context.Context, subSearchResultData []
401425
}
402426
score := subSearchResultData[subSearchIdx].Scores[resultDataIdx]
403427

404-
retSize += typeutil.AppendFieldData(ret.Results.FieldsData, subSearchResultData[subSearchIdx].FieldsData, resultDataIdx)
428+
if len(ret.Results.FieldsData) > 0 {
429+
retSize += typeutil.AppendFieldData(ret.Results.FieldsData, subSearchResultData[subSearchIdx].FieldsData, resultDataIdx)
430+
}
405431
typeutil.CopyPk(ret.Results.Ids, subSearchResultData[subSearchIdx].GetIds(), int(resultDataIdx))
406432
ret.Results.Scores = append(ret.Results.Scores, score)
407433
cursors[subSearchIdx]++
@@ -515,8 +541,13 @@ func rankSearchResultDataByGroup(ctx context.Context,
515541
return ret, nil
516542
}
517543

518-
// init FieldsData
519-
ret.Results.FieldsData = typeutil.PrepareResultFieldData(searchResults[0].GetResults().GetFieldsData(), limit)
544+
// Find the first non-empty FieldsData as template
545+
for _, result := range searchResults {
546+
if len(result.GetResults().GetFieldsData()) > 0 {
547+
ret.Results.FieldsData = typeutil.PrepareResultFieldData(result.GetResults().GetFieldsData(), limit)
548+
break
549+
}
550+
}
520551

521552
totalCount := limit * groupSize
522553
if err := setupIdListForSearchResult(ret, pkType, totalCount); err != nil {
@@ -643,7 +674,9 @@ func rankSearchResultDataByGroup(ctx context.Context,
643674
}
644675
ret.Results.Scores = append(ret.Results.Scores, score)
645676
loc := pk2DataOffset[i][group.idList[idx]]
646-
typeutil.AppendFieldData(ret.Results.FieldsData, searchResults[loc.resultIdx].GetResults().GetFieldsData(), int64(loc.offset))
677+
if len(ret.Results.FieldsData) > 0 {
678+
typeutil.AppendFieldData(ret.Results.FieldsData, searchResults[loc.resultIdx].GetResults().GetFieldsData(), int64(loc.offset))
679+
}
647680
typeutil.AppendGroupByValue(ret.Results, group.groupVal, groupByDataType)
648681
}
649682
returnedRowNum += len(group.idList)
@@ -712,8 +745,13 @@ func rankSearchResultDataByPk(ctx context.Context,
712745
return ret, nil
713746
}
714747

715-
// init FieldsData
716-
ret.Results.FieldsData = typeutil.PrepareResultFieldData(searchResults[0].GetResults().GetFieldsData(), limit)
748+
// Find the first non-empty FieldsData as template
749+
for _, result := range searchResults {
750+
if len(result.GetResults().GetFieldsData()) > 0 {
751+
ret.Results.FieldsData = typeutil.PrepareResultFieldData(result.GetResults().GetFieldsData(), limit)
752+
break
753+
}
754+
}
717755

718756
if err := setupIdListForSearchResult(ret, pkType, limit); err != nil {
719757
return ret, nil
@@ -783,7 +821,9 @@ func rankSearchResultDataByPk(ctx context.Context,
783821
}
784822
ret.Results.Scores = append(ret.Results.Scores, score)
785823
loc := pk2DataOffset[i][keys[index]]
786-
typeutil.AppendFieldData(ret.Results.FieldsData, searchResults[loc.resultIdx].GetResults().GetFieldsData(), loc.offset)
824+
if len(ret.Results.FieldsData) > 0 {
825+
typeutil.AppendFieldData(ret.Results.FieldsData, searchResults[loc.resultIdx].GetResults().GetFieldsData(), loc.offset)
826+
}
787827
}
788828
}
789829

internal/proxy/search_reduce_util_test.go

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,260 @@ func (struts *SearchReduceUtilTestSuite) TestReduceSearchResult() {
142142
}
143143
}
144144

145+
// TestReduceWithEmptyFieldsData tests reduce functions when FieldsData is empty (requery scenario)
146+
func (struts *SearchReduceUtilTestSuite) TestReduceWithEmptyFieldsData() {
147+
ctx := context.Background()
148+
nq := int64(1)
149+
topK := int64(5)
150+
offset := int64(0)
151+
152+
// Create search results with empty FieldsData (simulating requery scenario)
153+
searchResultData1 := &schemapb.SearchResultData{
154+
Ids: &schemapb.IDs{
155+
IdField: &schemapb.IDs_IntId{
156+
IntId: &schemapb.LongArray{
157+
Data: []int64{1, 2, 3, 4, 5},
158+
},
159+
},
160+
},
161+
Scores: []float32{0.9, 0.8, 0.7, 0.6, 0.5},
162+
Topks: []int64{5},
163+
NumQueries: nq,
164+
TopK: topK,
165+
FieldsData: []*schemapb.FieldData{}, // Empty FieldsData for requery
166+
}
167+
168+
searchResultData2 := &schemapb.SearchResultData{
169+
Ids: &schemapb.IDs{
170+
IdField: &schemapb.IDs_IntId{
171+
IntId: &schemapb.LongArray{
172+
Data: []int64{6, 7, 8, 9, 10},
173+
},
174+
},
175+
},
176+
Scores: []float32{0.85, 0.75, 0.65, 0.55, 0.45},
177+
Topks: []int64{5},
178+
NumQueries: nq,
179+
TopK: topK,
180+
FieldsData: []*schemapb.FieldData{}, // Empty FieldsData for requery
181+
}
182+
183+
// Test reduceSearchResultDataNoGroupBy with empty FieldsData
184+
{
185+
results, err := reduceSearchResultDataNoGroupBy(ctx, []*schemapb.SearchResultData{searchResultData1, searchResultData2}, nq, topK, "L2", schemapb.DataType_Int64, offset)
186+
struts.NoError(err)
187+
struts.NotNil(results)
188+
// Should have merged results without panic
189+
struts.Equal(int64(5), results.Results.Topks[0])
190+
// FieldsData should be empty since all inputs were empty
191+
struts.Equal(0, len(results.Results.FieldsData))
192+
}
193+
194+
// Test reduceSearchResultDataWithGroupBy with empty FieldsData
195+
{
196+
// Add GroupByFieldValue to support group by
197+
searchResultData1.GroupByFieldValue = &schemapb.FieldData{
198+
Type: schemapb.DataType_VarChar,
199+
FieldName: "group",
200+
FieldId: 101,
201+
Field: &schemapb.FieldData_Scalars{
202+
Scalars: &schemapb.ScalarField{
203+
Data: &schemapb.ScalarField_StringData{
204+
StringData: &schemapb.StringArray{
205+
Data: []string{"a", "b", "c", "a", "b"},
206+
},
207+
},
208+
},
209+
},
210+
}
211+
searchResultData2.GroupByFieldValue = &schemapb.FieldData{
212+
Type: schemapb.DataType_VarChar,
213+
FieldName: "group",
214+
FieldId: 101,
215+
Field: &schemapb.FieldData_Scalars{
216+
Scalars: &schemapb.ScalarField{
217+
Data: &schemapb.ScalarField_StringData{
218+
StringData: &schemapb.StringArray{
219+
Data: []string{"c", "a", "b", "c", "a"},
220+
},
221+
},
222+
},
223+
},
224+
}
225+
226+
results, err := reduceSearchResultDataWithGroupBy(ctx, []*schemapb.SearchResultData{searchResultData1, searchResultData2}, nq, topK, "L2", schemapb.DataType_Int64, offset, int64(2))
227+
struts.NoError(err)
228+
struts.NotNil(results)
229+
// FieldsData should be empty since all inputs were empty
230+
struts.Equal(0, len(results.Results.FieldsData))
231+
}
232+
233+
// Test reduceAdvanceGroupBY with empty FieldsData
234+
{
235+
results, err := reduceAdvanceGroupBY(ctx, []*schemapb.SearchResultData{searchResultData1, searchResultData2}, nq, topK, schemapb.DataType_Int64, "L2")
236+
struts.NoError(err)
237+
struts.NotNil(results)
238+
// FieldsData should be empty since all inputs were empty
239+
struts.Equal(0, len(results.Results.FieldsData))
240+
}
241+
}
242+
243+
// TestReduceWithPartialEmptyFieldsData tests when first result has empty FieldsData but second has data
244+
func (struts *SearchReduceUtilTestSuite) TestReduceWithPartialEmptyFieldsData() {
245+
ctx := context.Background()
246+
nq := int64(1)
247+
topK := int64(3)
248+
offset := int64(0)
249+
250+
// First result with empty FieldsData
251+
searchResultData1 := &schemapb.SearchResultData{
252+
Ids: &schemapb.IDs{
253+
IdField: &schemapb.IDs_IntId{
254+
IntId: &schemapb.LongArray{
255+
Data: []int64{1, 2, 3},
256+
},
257+
},
258+
},
259+
Scores: []float32{0.9, 0.8, 0.7},
260+
Topks: []int64{3},
261+
NumQueries: nq,
262+
TopK: topK,
263+
FieldsData: []*schemapb.FieldData{}, // Empty
264+
}
265+
266+
// Second result with non-empty FieldsData
267+
searchResultData2 := &schemapb.SearchResultData{
268+
Ids: &schemapb.IDs{
269+
IdField: &schemapb.IDs_IntId{
270+
IntId: &schemapb.LongArray{
271+
Data: []int64{4, 5, 6},
272+
},
273+
},
274+
},
275+
Scores: []float32{0.85, 0.75, 0.65},
276+
Topks: []int64{3},
277+
NumQueries: nq,
278+
TopK: topK,
279+
FieldsData: []*schemapb.FieldData{
280+
{
281+
Type: schemapb.DataType_Int64,
282+
FieldName: "field1",
283+
FieldId: 100,
284+
Field: &schemapb.FieldData_Scalars{
285+
Scalars: &schemapb.ScalarField{
286+
Data: &schemapb.ScalarField_LongData{
287+
LongData: &schemapb.LongArray{
288+
Data: []int64{40, 50, 60},
289+
},
290+
},
291+
},
292+
},
293+
},
294+
},
295+
}
296+
297+
// Test: Should use the non-empty FieldsData from second result
298+
results, err := reduceSearchResultDataNoGroupBy(ctx, []*schemapb.SearchResultData{searchResultData1, searchResultData2}, nq, topK, "L2", schemapb.DataType_Int64, offset)
299+
struts.NoError(err)
300+
struts.NotNil(results)
301+
// Should have initialized FieldsData from second result
302+
struts.Greater(len(results.Results.FieldsData), 0)
303+
}
304+
305+
// TestRankWithEmptyFieldsData tests rank functions when FieldsData is empty
306+
func (struts *SearchReduceUtilTestSuite) TestRankWithEmptyFieldsData() {
307+
ctx := context.Background()
308+
nq := int64(1)
309+
limit := int64(3)
310+
offset := int64(0)
311+
roundDecimal := int64(-1)
312+
rankParams := &rankParams{limit: limit, offset: offset, roundDecimal: roundDecimal}
313+
314+
// Create search results with empty FieldsData
315+
searchResult1 := &milvuspb.SearchResults{
316+
Results: &schemapb.SearchResultData{
317+
Ids: &schemapb.IDs{
318+
IdField: &schemapb.IDs_IntId{
319+
IntId: &schemapb.LongArray{
320+
Data: []int64{1, 2, 3},
321+
},
322+
},
323+
},
324+
Scores: []float32{0.9, 0.8, 0.7},
325+
Topks: []int64{3},
326+
NumQueries: nq,
327+
FieldsData: []*schemapb.FieldData{}, // Empty
328+
},
329+
}
330+
331+
searchResult2 := &milvuspb.SearchResults{
332+
Results: &schemapb.SearchResultData{
333+
Ids: &schemapb.IDs{
334+
IdField: &schemapb.IDs_IntId{
335+
IntId: &schemapb.LongArray{
336+
Data: []int64{2, 4, 5},
337+
},
338+
},
339+
},
340+
Scores: []float32{0.85, 0.75, 0.65},
341+
Topks: []int64{3},
342+
NumQueries: nq,
343+
FieldsData: []*schemapb.FieldData{}, // Empty
344+
},
345+
}
346+
347+
searchResults := []*milvuspb.SearchResults{searchResult1, searchResult2}
348+
349+
// Test rankSearchResultDataByPk with empty FieldsData
350+
{
351+
results, err := rankSearchResultDataByPk(ctx, nq, rankParams, schemapb.DataType_Int64, searchResults)
352+
struts.NoError(err)
353+
struts.NotNil(results)
354+
// FieldsData should be empty since all inputs were empty
355+
struts.Equal(0, len(results.Results.FieldsData))
356+
}
357+
358+
// Test rankSearchResultDataByGroup with empty FieldsData
359+
{
360+
// Add group by values
361+
searchResult1.Results.GroupByFieldValue = &schemapb.FieldData{
362+
Type: schemapb.DataType_VarChar,
363+
FieldName: "group",
364+
FieldId: 101,
365+
Field: &schemapb.FieldData_Scalars{
366+
Scalars: &schemapb.ScalarField{
367+
Data: &schemapb.ScalarField_StringData{
368+
StringData: &schemapb.StringArray{
369+
Data: []string{"a", "b", "c"},
370+
},
371+
},
372+
},
373+
},
374+
}
375+
searchResult2.Results.GroupByFieldValue = &schemapb.FieldData{
376+
Type: schemapb.DataType_VarChar,
377+
FieldName: "group",
378+
FieldId: 101,
379+
Field: &schemapb.FieldData_Scalars{
380+
Scalars: &schemapb.ScalarField{
381+
Data: &schemapb.ScalarField_StringData{
382+
StringData: &schemapb.StringArray{
383+
Data: []string{"b", "a", "c"},
384+
},
385+
},
386+
},
387+
},
388+
}
389+
390+
groupScorer, _ := GetGroupScorer("max")
391+
results, err := rankSearchResultDataByGroup(ctx, nq, rankParams, schemapb.DataType_Int64, searchResults, groupScorer, int64(2))
392+
struts.NoError(err)
393+
struts.NotNil(results)
394+
// FieldsData should be empty since all inputs were empty
395+
struts.Equal(0, len(results.Results.FieldsData))
396+
}
397+
}
398+
145399
func TestSearchReduceUtilTestSuite(t *testing.T) {
146400
suite.Run(t, new(SearchReduceUtilTestSuite))
147401
}

0 commit comments

Comments
 (0)