Skip to content

Commit 93f7747

Browse files
authored
feat: mix search weight (#4170)
* feat: mix search weight * feat: svg render
1 parent 6aa5e2c commit 93f7747

File tree

29 files changed

+179
-31
lines changed

29 files changed

+179
-31
lines changed

docSite/content/zh-cn/docs/development/upgrading/491.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ weight: 799
1313
2. 知识库分块阅读器。
1414
3. API 知识库支持 PDF 增强解析。
1515
4. 邀请团队成员,改为邀请链接模式。
16-
5. 支持重排模型选择和权重设置,同时调整了知识库搜索权重计算方式,改成 搜索权重 + 重排权重,而不是向量检索权重+全文检索权重+重排权重。
16+
5. 支持混合检索权重设置。
17+
6. 支持重排模型选择和权重设置,同时调整了知识库搜索权重计算方式,改成 搜索权重 + 重排权重,而不是向量检索权重+全文检索权重+重排权重。
1718

1819
## ⚙️ 优化
1920

packages/global/core/app/type.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ export type AppDatasetSearchParamsType = {
7575
searchMode: `${DatasetSearchModeEnum}`;
7676
limit?: number; // limit max tokens
7777
similarity?: number;
78+
embeddingWeight?: number; // embedding weight, fullText weight = 1 - embeddingWeight
7879

7980
usingReRank?: boolean;
8081
rerankModel?: string;

packages/global/core/app/utils.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ export const appWorkflow2Form = ({
108108
defaultAppForm.dataset.searchMode =
109109
findInputValueByKey(node.inputs, NodeInputKeyEnum.datasetSearchMode) ||
110110
DatasetSearchModeEnum.embedding;
111+
defaultAppForm.dataset.embeddingWeight = findInputValueByKey(
112+
node.inputs,
113+
NodeInputKeyEnum.datasetSearchEmbeddingWeight
114+
);
111115
// Rerank
112116
defaultAppForm.dataset.usingReRank = !!findInputValueByKey(
113117
node.inputs,

packages/global/core/dataset/constants.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ export enum SearchScoreTypeEnum {
185185
}
186186
export const SearchScoreTypeMap = {
187187
[SearchScoreTypeEnum.embedding]: {
188-
label: i18nT('common:core.dataset.search.score.embedding'),
188+
label: i18nT('common:core.dataset.search.mode.embedding'),
189189
desc: i18nT('common:core.dataset.search.score.embedding desc'),
190190
showScore: true
191191
},

packages/global/core/workflow/constants.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,12 @@ export enum NodeInputKeyEnum {
154154
datasetSimilarity = 'similarity',
155155
datasetMaxTokens = 'limit',
156156
datasetSearchMode = 'searchMode',
157+
datasetSearchEmbeddingWeight = 'embeddingWeight',
158+
157159
datasetSearchUsingReRank = 'usingReRank',
158160
datasetSearchRerankWeight = 'rerankWeight',
159161
datasetSearchRerankModel = 'rerankModel',
162+
160163
datasetSearchUsingExtensionQuery = 'datasetSearchUsingExtensionQuery',
161164
datasetSearchExtensionModel = 'datasetSearchExtensionModel',
162165
datasetSearchExtensionBg = 'datasetSearchExtensionBg',

packages/global/core/workflow/runtime/type.d.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ export type DispatchNodeResponseType = {
133133
similarity?: number;
134134
limit?: number;
135135
searchMode?: `${DatasetSearchModeEnum}`;
136+
embeddingWeight?: number;
137+
rerankModel?: string;
138+
rerankWeight?: number;
136139
searchUsingReRank?: boolean;
137140
queryExtensionResult?: {
138141
model: string;

packages/global/core/workflow/template/system/datasetSearch.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ export const DatasetSearchModule: FlowNodeTemplateType = {
6464
valueType: WorkflowIOValueTypeEnum.string,
6565
value: DatasetSearchModeEnum.embedding
6666
},
67+
{
68+
key: NodeInputKeyEnum.datasetSearchEmbeddingWeight,
69+
renderTypeList: [FlowNodeInputTypeEnum.hidden],
70+
label: '',
71+
valueType: WorkflowIOValueTypeEnum.number,
72+
value: 0.5
73+
},
6774
// Rerank
6875
{
6976
key: NodeInputKeyEnum.datasetSearchUsingReRank,

packages/service/core/dataset/search/controller.ts

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ export type SearchDatasetDataProps = {
4040
[NodeInputKeyEnum.datasetSimilarity]?: number; // min distance
4141
[NodeInputKeyEnum.datasetMaxTokens]: number; // max Token limit
4242
[NodeInputKeyEnum.datasetSearchMode]?: `${DatasetSearchModeEnum}`;
43+
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
4344

4445
[NodeInputKeyEnum.datasetSearchUsingReRank]?: boolean;
4546
[NodeInputKeyEnum.datasetSearchRerankModel]?: RerankModelItemType;
@@ -161,6 +162,7 @@ export async function searchDatasetData(
161162
similarity = 0,
162163
limit: maxTokens,
163164
searchMode = DatasetSearchModeEnum.embedding,
165+
embeddingWeight = 0.5,
164166
usingReRank = false,
165167
rerankModel,
166168
rerankWeight = 0.5,
@@ -731,16 +733,20 @@ export async function searchDatasetData(
731733
})();
732734

733735
// embedding recall and fullText recall rrf concat
736+
const baseK = 120;
737+
const embK = Math.round(baseK * (1 - embeddingWeight)); // 搜索结果的 k 值
738+
const fullTextK = Math.round(baseK * embeddingWeight); // rerank 结果的 k 值
739+
734740
const rrfSearchResult = datasetSearchResultConcat([
735-
{ k: 60, list: embeddingRecallResults },
736-
{ k: 60, list: fullTextRecallResults }
741+
{ k: embK, list: embeddingRecallResults },
742+
{ k: fullTextK, list: fullTextRecallResults }
737743
]);
738744
const rrfConcatResults = (() => {
745+
if (reRankResults.length === 0) return rrfSearchResult;
739746
if (rerankWeight === 1) return reRankResults;
740747

741-
const baseK = 30;
742-
const searchK = Math.round(baseK / (1 - rerankWeight)); // 搜索结果的 k 值
743-
const rerankK = Math.round(baseK / rerankWeight); // rerank 结果的 k 值
748+
const searchK = Math.round(baseK * rerankWeight); // 搜索结果的 k 值
749+
const rerankK = Math.round(baseK * (1 - rerankWeight)); // rerank 结果的 k 值
744750

745751
return datasetSearchResultConcat([
746752
{ k: searchK, list: rrfSearchResult },

packages/service/core/workflow/dispatch/dataset/search.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ type DatasetSearchProps = ModuleDispatchProps<{
2222
[NodeInputKeyEnum.datasetSelectList]: SelectedDatasetType;
2323
[NodeInputKeyEnum.datasetSimilarity]: number;
2424
[NodeInputKeyEnum.datasetMaxTokens]: number;
25-
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
2625
[NodeInputKeyEnum.userChatInput]?: string;
26+
[NodeInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
27+
[NodeInputKeyEnum.datasetSearchEmbeddingWeight]?: number;
2728

2829
[NodeInputKeyEnum.datasetSearchUsingReRank]: boolean;
2930
[NodeInputKeyEnum.datasetSearchRerankModel]?: string;
@@ -57,11 +58,11 @@ export async function dispatchDatasetSearch(
5758
datasets = [],
5859
similarity,
5960
limit = 1500,
60-
searchMode,
6161
userChatInput = '',
6262
authTmbId = false,
6363
collectionFilterMatch,
64-
64+
searchMode,
65+
embeddingWeight,
6566
usingReRank,
6667
rerankModel,
6768
rerankWeight,
@@ -129,6 +130,7 @@ export async function dispatchDatasetSearch(
129130
limit,
130131
datasetIds,
131132
searchMode,
133+
embeddingWeight,
132134
usingReRank: usingReRank && (await checkTeamReRankPermission(teamId)),
133135
rerankModel: getRerankModel(rerankModel),
134136
rerankWeight,
@@ -228,6 +230,9 @@ export async function dispatchDatasetSearch(
228230
similarity: usingSimilarityFilter ? similarity : undefined,
229231
limit,
230232
searchMode,
233+
embeddingWeight: searchMode === DatasetSearchModeEnum.mixedRecall ? embeddingWeight : undefined,
234+
rerankModel: usingReRank ? getRerankModel(rerankModel)?.name : undefined,
235+
rerankWeight: usingReRank ? rerankWeight : undefined,
231236
searchUsingReRank: searchUsingReRank,
232237
quoteList: searchRes,
233238
queryExtensionResult,

packages/web/components/common/Icon/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ export const iconPaths = {
429429
'price/bg': () => import('./icons/price/bg.svg'),
430430
'price/right': () => import('./icons/price/right.svg'),
431431
save: () => import('./icons/save.svg'),
432+
sliderTag: () => import('./icons/sliderTag.svg'),
432433
stop: () => import('./icons/stop.svg'),
433434
'support/account/laf': () => import('./icons/support/account/laf.svg'),
434435
'support/account/loginoutLight': () => import('./icons/support/account/loginoutLight.svg'),

0 commit comments

Comments
 (0)