Skip to content

Commit caaebe4

Browse files
memset0binary-huskySakuraPuare
authored
add support for Deepseek R1 model and display CoT (#2118)
* feat: add support for R1 model and display CoT * fix unpacking * feat: customized font & font size * auto hide tooltip when scoll down * tooltip glass transparent css * fix: Enhance API key validation in is_any_api_key function (#2113) * support qwen2.5-max! * update minior adjustment --------- Co-authored-by: binary-husky <[email protected]> Co-authored-by: Steven Moder <[email protected]>
1 parent 0458590 commit caaebe4

File tree

4 files changed

+133
-75
lines changed

4 files changed

+133
-75
lines changed

config.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
# [step 1-2]>> ( 接入通义 qwen-max ) 接入通义千问在线大模型,api-key获取地址 https://dashscope.console.aliyun.com/
1414
DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY
1515

16+
# [step 1-3]>> ( 接入通义 deepseek-reasoner ) 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions"
17+
DEEPSEEK_API_KEY = ""
18+
1619
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改;如果使用本地或无地域限制的大模型时,此处也不需要修改
1720
USE_PROXY = False
1821
if USE_PROXY:
@@ -39,7 +42,8 @@
3942
"gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-4-turbo-2024-04-09",
4043
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
4144
"gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
42-
"gemini-1.5-pro", "chatglm3", "chatglm4"
45+
"gemini-1.5-pro", "chatglm3", "chatglm4",
46+
"deepseek-chat", "deepseek-coder", "deepseek-reasoner"
4347
]
4448

4549
EMBEDDING_MODEL = "text-embedding-3-small"
@@ -261,9 +265,6 @@
261265
# 零一万物(Yi Model) API KEY
262266
YIMODEL_API_KEY = ""
263267

264-
# 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions"
265-
DEEPSEEK_API_KEY = ""
266-
267268

268269
# 紫东太初大模型 https://ai-maas.wair.ac.cn
269270
TAICHU_API_KEY = ""

request_llms/bridge_all.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,18 +1090,18 @@ def decode(self, *args, **kwargs):
10901090
except:
10911091
logger.error(trimmed_format_exc())
10921092
# -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
1093-
if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS:
1093+
if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS:
10941094
try:
10951095
deepseekapi_noui, deepseekapi_ui = get_predict_function(
10961096
api_key_conf_name="DEEPSEEK_API_KEY", max_output_token=4096, disable_proxy=False
1097-
)
1097+
)
10981098
model_info.update({
10991099
"deepseek-chat":{
11001100
"fn_with_ui": deepseekapi_ui,
11011101
"fn_without_ui": deepseekapi_noui,
11021102
"endpoint": deepseekapi_endpoint,
11031103
"can_multi_thread": True,
1104-
"max_token": 32000,
1104+
"max_token": 64000,
11051105
"tokenizer": tokenizer_gpt35,
11061106
"token_cnt": get_token_num_gpt35,
11071107
},
@@ -1114,6 +1114,16 @@ def decode(self, *args, **kwargs):
11141114
"tokenizer": tokenizer_gpt35,
11151115
"token_cnt": get_token_num_gpt35,
11161116
},
1117+
"deepseek-reasoner":{
1118+
"fn_with_ui": deepseekapi_ui,
1119+
"fn_without_ui": deepseekapi_noui,
1120+
"endpoint": deepseekapi_endpoint,
1121+
"can_multi_thread": True,
1122+
"max_token": 64000,
1123+
"tokenizer": tokenizer_gpt35,
1124+
"token_cnt": get_token_num_gpt35,
1125+
"enable_reasoning": True
1126+
},
11171127
})
11181128
except:
11191129
logger.error(trimmed_format_exc())

request_llms/oai_std_model_template.py

Lines changed: 58 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@ def get_full_error(chunk, stream_response):
3636

3737
def decode_chunk(chunk):
3838
"""
39-
用于解读"content"和"finish_reason"的内容
39+
用于解读"content"和"finish_reason"的内容(如果支持思维链也会返回"reasoning_content"内容)
4040
"""
4141
chunk = chunk.decode()
4242
respose = ""
43+
reasoning_content = ""
4344
finish_reason = "False"
4445
try:
4546
chunk = json.loads(chunk[6:])
@@ -57,14 +58,20 @@ def decode_chunk(chunk):
5758
return respose, finish_reason
5859

5960
try:
60-
respose = chunk["choices"][0]["delta"]["content"]
61+
if chunk["choices"][0]["delta"]["content"] is not None:
62+
respose = chunk["choices"][0]["delta"]["content"]
63+
except:
64+
pass
65+
try:
66+
if chunk["choices"][0]["delta"]["reasoning_content"] is not None:
67+
reasoning_content = chunk["choices"][0]["delta"]["reasoning_content"]
6168
except:
6269
pass
6370
try:
6471
finish_reason = chunk["choices"][0]["finish_reason"]
6572
except:
6673
pass
67-
return respose, finish_reason
74+
return respose, reasoning_content, finish_reason
6875

6976

7077
def generate_message(input, model, key, history, max_output_token, system_prompt, temperature):
@@ -149,6 +156,7 @@ def predict_no_ui_long_connection(
149156
observe_window = None:
150157
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
151158
"""
159+
from .bridge_all import model_info
152160
watch_dog_patience = 5 # 看门狗的耐心,设置5秒不准咬人(咬的也不是人
153161
if len(APIKEY) == 0:
154162
raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
@@ -163,29 +171,21 @@ def predict_no_ui_long_connection(
163171
system_prompt=sys_prompt,
164172
temperature=llm_kwargs["temperature"],
165173
)
174+
175+
reasoning = model_info[llm_kwargs['llm_model']].get('enable_reasoning', False)
176+
166177
retry = 0
167178
while True:
168179
try:
169-
from .bridge_all import model_info
170-
171180
endpoint = model_info[llm_kwargs["llm_model"]]["endpoint"]
172-
if not disable_proxy:
173-
response = requests.post(
174-
endpoint,
175-
headers=headers,
176-
proxies=proxies,
177-
json=playload,
178-
stream=True,
179-
timeout=TIMEOUT_SECONDS,
180-
)
181-
else:
182-
response = requests.post(
183-
endpoint,
184-
headers=headers,
185-
json=playload,
186-
stream=True,
187-
timeout=TIMEOUT_SECONDS,
188-
)
181+
response = requests.post(
182+
endpoint,
183+
headers=headers,
184+
proxies=None if disable_proxy else proxies,
185+
json=playload,
186+
stream=True,
187+
timeout=TIMEOUT_SECONDS,
188+
)
189189
break
190190
except:
191191
retry += 1
@@ -194,10 +194,13 @@ def predict_no_ui_long_connection(
194194
raise TimeoutError
195195
if MAX_RETRY != 0:
196196
logger.error(f"请求超时,正在重试 ({retry}/{MAX_RETRY}) ……")
197-
198-
stream_response = response.iter_lines()
197+
199198
result = ""
200199
finish_reason = ""
200+
if reasoning:
201+
resoning_buffer = ""
202+
203+
stream_response = response.iter_lines()
201204
while True:
202205
try:
203206
chunk = next(stream_response)
@@ -207,9 +210,9 @@ def predict_no_ui_long_connection(
207210
break
208211
except requests.exceptions.ConnectionError:
209212
chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
210-
response_text, finish_reason = decode_chunk(chunk)
213+
response_text, reasoning_content, finish_reason = decode_chunk(chunk)
211214
# 返回的数据流第一次为空,继续等待
212-
if response_text == "" and finish_reason != "False":
215+
if response_text == "" and (reasoning == False or reasoning_content == "") and finish_reason != "False":
213216
continue
214217
if response_text == "API_ERROR" and (
215218
finish_reason != "False" or finish_reason != "stop"
@@ -227,6 +230,8 @@ def predict_no_ui_long_connection(
227230
print(f"[response] {result}")
228231
break
229232
result += response_text
233+
if reasoning:
234+
resoning_buffer += reasoning_content
230235
if observe_window is not None:
231236
# 观测窗,把已经获取的数据显示出去
232237
if len(observe_window) >= 1:
@@ -241,6 +246,10 @@ def predict_no_ui_long_connection(
241246
error_msg = chunk_decoded
242247
logger.error(error_msg)
243248
raise RuntimeError("Json解析不合常规")
249+
if reasoning:
250+
# reasoning 的部分加上框 (>)
251+
return '\n'.join(map(lambda x: '> ' + x, resoning_buffer.split('\n'))) + \
252+
'\n\n' + result
244253
return result
245254

246255
def predict(
@@ -262,6 +271,7 @@ def predict(
262271
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
263272
additional_fn代表点击的哪个按钮,按钮见functional.py
264273
"""
274+
from .bridge_all import model_info
265275
if len(APIKEY) == 0:
266276
raise RuntimeError(f"APIKEY为空,请检查配置文件的{APIKEY}")
267277
if inputs == "":
@@ -298,32 +308,23 @@ def predict(
298308
system_prompt=system_prompt,
299309
temperature=llm_kwargs["temperature"],
300310
)
311+
312+
reasoning = model_info[llm_kwargs['llm_model']].get('enable_reasoning', False)
301313

302314
history.append(inputs)
303315
history.append("")
304316
retry = 0
305317
while True:
306318
try:
307-
from .bridge_all import model_info
308-
309319
endpoint = model_info[llm_kwargs["llm_model"]]["endpoint"]
310-
if not disable_proxy:
311-
response = requests.post(
312-
endpoint,
313-
headers=headers,
314-
proxies=proxies,
315-
json=playload,
316-
stream=True,
317-
timeout=TIMEOUT_SECONDS,
318-
)
319-
else:
320-
response = requests.post(
321-
endpoint,
322-
headers=headers,
323-
json=playload,
324-
stream=True,
325-
timeout=TIMEOUT_SECONDS,
326-
)
320+
response = requests.post(
321+
endpoint,
322+
headers=headers,
323+
proxies=None if disable_proxy else proxies,
324+
json=playload,
325+
stream=True,
326+
timeout=TIMEOUT_SECONDS,
327+
)
327328
break
328329
except:
329330
retry += 1
@@ -338,6 +339,8 @@ def predict(
338339
raise TimeoutError
339340

340341
gpt_replying_buffer = ""
342+
if reasoning:
343+
gpt_reasoning_buffer = ""
341344

342345
stream_response = response.iter_lines()
343346
while True:
@@ -347,9 +350,9 @@ def predict(
347350
break
348351
except requests.exceptions.ConnectionError:
349352
chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
350-
response_text, finish_reason = decode_chunk(chunk)
353+
response_text, reasoning_content, finish_reason = decode_chunk(chunk)
351354
# 返回的数据流第一次为空,继续等待
352-
if response_text == "" and finish_reason != "False":
355+
if response_text == "" and (reasoning == False or reasoning_content == "") and finish_reason != "False":
353356
status_text = f"finish_reason: {finish_reason}"
354357
yield from update_ui(
355358
chatbot=chatbot, history=history, msg=status_text
@@ -379,9 +382,14 @@ def predict(
379382
logger.info(f"[response] {gpt_replying_buffer}")
380383
break
381384
status_text = f"finish_reason: {finish_reason}"
382-
gpt_replying_buffer += response_text
383-
# 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
384-
history[-1] = gpt_replying_buffer
385+
if reasoning:
386+
gpt_replying_buffer += response_text
387+
gpt_reasoning_buffer += reasoning_content
388+
history[-1] = '\n'.join(map(lambda x: '> ' + x, gpt_reasoning_buffer.split('\n'))) + '\n\n' + gpt_replying_buffer
389+
else:
390+
gpt_replying_buffer += response_text
391+
# 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
392+
history[-1] = gpt_replying_buffer
385393
chatbot[-1] = (history[-2], history[-1])
386394
yield from update_ui(
387395
chatbot=chatbot, history=history, msg=status_text

0 commit comments

Comments
 (0)