@@ -40,6 +40,7 @@ export type SearchDatasetDataProps = {
4040 [ NodeInputKeyEnum . datasetSimilarity ] ?: number ; // min distance
4141 [ NodeInputKeyEnum . datasetMaxTokens ] : number ; // max Token limit
4242 [ NodeInputKeyEnum . datasetSearchMode ] ?: `${DatasetSearchModeEnum } `;
43+ [ NodeInputKeyEnum . datasetSearchEmbeddingWeight ] ?: number ;
4344
4445 [ NodeInputKeyEnum . datasetSearchUsingReRank ] ?: boolean ;
4546 [ NodeInputKeyEnum . datasetSearchRerankModel ] ?: RerankModelItemType ;
@@ -161,6 +162,7 @@ export async function searchDatasetData(
161162 similarity = 0 ,
162163 limit : maxTokens ,
163164 searchMode = DatasetSearchModeEnum . embedding ,
165+ embeddingWeight = 0.5 ,
164166 usingReRank = false ,
165167 rerankModel,
166168 rerankWeight = 0.5 ,
@@ -731,16 +733,20 @@ export async function searchDatasetData(
731733 } ) ( ) ;
732734
733735 // embedding recall and fullText recall rrf concat
736+ const baseK = 120 ;
737+ const embK = Math . round ( baseK * ( 1 - embeddingWeight ) ) ; // 搜索结果的 k 值
738+ const fullTextK = Math . round ( baseK * embeddingWeight ) ; // rerank 结果的 k 值
739+
734740 const rrfSearchResult = datasetSearchResultConcat ( [
735- { k : 60 , list : embeddingRecallResults } ,
736- { k : 60 , list : fullTextRecallResults }
741+ { k : embK , list : embeddingRecallResults } ,
742+ { k : fullTextK , list : fullTextRecallResults }
737743 ] ) ;
738744 const rrfConcatResults = ( ( ) => {
745+ if ( reRankResults . length === 0 ) return rrfSearchResult ;
739746 if ( rerankWeight === 1 ) return reRankResults ;
740747
741- const baseK = 30 ;
742- const searchK = Math . round ( baseK / ( 1 - rerankWeight ) ) ; // 搜索结果的 k 值
743- const rerankK = Math . round ( baseK / rerankWeight ) ; // rerank 结果的 k 值
748+ const searchK = Math . round ( baseK * rerankWeight ) ; // 搜索结果的 k 值
749+ const rerankK = Math . round ( baseK * ( 1 - rerankWeight ) ) ; // rerank 结果的 k 值
744750
745751 return datasetSearchResultConcat ( [
746752 { k : searchK , list : rrfSearchResult } ,
0 commit comments