@@ -36,10 +36,11 @@ def get_full_error(chunk, stream_response):
36
36
37
37
def decode_chunk (chunk ):
38
38
"""
39
- 用于解读"content"和"finish_reason"的内容
39
+ 用于解读"content"和"finish_reason"的内容(如果支持思维链也会返回"reasoning_content"内容)
40
40
"""
41
41
chunk = chunk .decode ()
42
42
respose = ""
43
+ reasoning_content = ""
43
44
finish_reason = "False"
44
45
try :
45
46
chunk = json .loads (chunk [6 :])
@@ -57,14 +58,20 @@ def decode_chunk(chunk):
57
58
return respose , finish_reason
58
59
59
60
try :
60
- respose = chunk ["choices" ][0 ]["delta" ]["content" ]
61
+ if chunk ["choices" ][0 ]["delta" ]["content" ] is not None :
62
+ respose = chunk ["choices" ][0 ]["delta" ]["content" ]
63
+ except :
64
+ pass
65
+ try :
66
+ if chunk ["choices" ][0 ]["delta" ]["reasoning_content" ] is not None :
67
+ reasoning_content = chunk ["choices" ][0 ]["delta" ]["reasoning_content" ]
61
68
except :
62
69
pass
63
70
try :
64
71
finish_reason = chunk ["choices" ][0 ]["finish_reason" ]
65
72
except :
66
73
pass
67
- return respose , finish_reason
74
+ return respose , reasoning_content , finish_reason
68
75
69
76
70
77
def generate_message (input , model , key , history , max_output_token , system_prompt , temperature ):
@@ -149,6 +156,7 @@ def predict_no_ui_long_connection(
149
156
observe_window = None:
150
157
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
151
158
"""
159
+ from .bridge_all import model_info
152
160
watch_dog_patience = 5 # 看门狗的耐心,设置5秒不准咬人(咬的也不是人
153
161
if len (APIKEY ) == 0 :
154
162
raise RuntimeError (f"APIKEY为空,请检查配置文件的{ APIKEY } " )
@@ -163,29 +171,21 @@ def predict_no_ui_long_connection(
163
171
system_prompt = sys_prompt ,
164
172
temperature = llm_kwargs ["temperature" ],
165
173
)
174
+
175
+ reasoning = model_info [llm_kwargs ['llm_model' ]].get ('enable_reasoning' , False )
176
+
166
177
retry = 0
167
178
while True :
168
179
try :
169
- from .bridge_all import model_info
170
-
171
180
endpoint = model_info [llm_kwargs ["llm_model" ]]["endpoint" ]
172
- if not disable_proxy :
173
- response = requests .post (
174
- endpoint ,
175
- headers = headers ,
176
- proxies = proxies ,
177
- json = playload ,
178
- stream = True ,
179
- timeout = TIMEOUT_SECONDS ,
180
- )
181
- else :
182
- response = requests .post (
183
- endpoint ,
184
- headers = headers ,
185
- json = playload ,
186
- stream = True ,
187
- timeout = TIMEOUT_SECONDS ,
188
- )
181
+ response = requests .post (
182
+ endpoint ,
183
+ headers = headers ,
184
+ proxies = None if disable_proxy else proxies ,
185
+ json = playload ,
186
+ stream = True ,
187
+ timeout = TIMEOUT_SECONDS ,
188
+ )
189
189
break
190
190
except :
191
191
retry += 1
@@ -194,10 +194,13 @@ def predict_no_ui_long_connection(
194
194
raise TimeoutError
195
195
if MAX_RETRY != 0 :
196
196
logger .error (f"请求超时,正在重试 ({ retry } /{ MAX_RETRY } ) ……" )
197
-
198
- stream_response = response .iter_lines ()
197
+
199
198
result = ""
200
199
finish_reason = ""
200
+ if reasoning :
201
+ resoning_buffer = ""
202
+
203
+ stream_response = response .iter_lines ()
201
204
while True :
202
205
try :
203
206
chunk = next (stream_response )
@@ -207,9 +210,9 @@ def predict_no_ui_long_connection(
207
210
break
208
211
except requests .exceptions .ConnectionError :
209
212
chunk = next (stream_response ) # 失败了,重试一次?再失败就没办法了。
210
- response_text , finish_reason = decode_chunk (chunk )
213
+ response_text , reasoning_content , finish_reason = decode_chunk (chunk )
211
214
# 返回的数据流第一次为空,继续等待
212
- if response_text == "" and finish_reason != "False" :
215
+ if response_text == "" and ( reasoning == False or reasoning_content == "" ) and finish_reason != "False" :
213
216
continue
214
217
if response_text == "API_ERROR" and (
215
218
finish_reason != "False" or finish_reason != "stop"
@@ -227,6 +230,8 @@ def predict_no_ui_long_connection(
227
230
print (f"[response] { result } " )
228
231
break
229
232
result += response_text
233
+ if reasoning :
234
+ resoning_buffer += reasoning_content
230
235
if observe_window is not None :
231
236
# 观测窗,把已经获取的数据显示出去
232
237
if len (observe_window ) >= 1 :
@@ -241,6 +246,10 @@ def predict_no_ui_long_connection(
241
246
error_msg = chunk_decoded
242
247
logger .error (error_msg )
243
248
raise RuntimeError ("Json解析不合常规" )
249
+ if reasoning :
250
+ # reasoning 的部分加上框 (>)
251
+ return '\n ' .join (map (lambda x : '> ' + x , resoning_buffer .split ('\n ' ))) + \
252
+ '\n \n ' + result
244
253
return result
245
254
246
255
def predict (
@@ -262,6 +271,7 @@ def predict(
262
271
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
263
272
additional_fn代表点击的哪个按钮,按钮见functional.py
264
273
"""
274
+ from .bridge_all import model_info
265
275
if len (APIKEY ) == 0 :
266
276
raise RuntimeError (f"APIKEY为空,请检查配置文件的{ APIKEY } " )
267
277
if inputs == "" :
@@ -298,32 +308,23 @@ def predict(
298
308
system_prompt = system_prompt ,
299
309
temperature = llm_kwargs ["temperature" ],
300
310
)
311
+
312
+ reasoning = model_info [llm_kwargs ['llm_model' ]].get ('enable_reasoning' , False )
301
313
302
314
history .append (inputs )
303
315
history .append ("" )
304
316
retry = 0
305
317
while True :
306
318
try :
307
- from .bridge_all import model_info
308
-
309
319
endpoint = model_info [llm_kwargs ["llm_model" ]]["endpoint" ]
310
- if not disable_proxy :
311
- response = requests .post (
312
- endpoint ,
313
- headers = headers ,
314
- proxies = proxies ,
315
- json = playload ,
316
- stream = True ,
317
- timeout = TIMEOUT_SECONDS ,
318
- )
319
- else :
320
- response = requests .post (
321
- endpoint ,
322
- headers = headers ,
323
- json = playload ,
324
- stream = True ,
325
- timeout = TIMEOUT_SECONDS ,
326
- )
320
+ response = requests .post (
321
+ endpoint ,
322
+ headers = headers ,
323
+ proxies = None if disable_proxy else proxies ,
324
+ json = playload ,
325
+ stream = True ,
326
+ timeout = TIMEOUT_SECONDS ,
327
+ )
327
328
break
328
329
except :
329
330
retry += 1
@@ -338,6 +339,8 @@ def predict(
338
339
raise TimeoutError
339
340
340
341
gpt_replying_buffer = ""
342
+ if reasoning :
343
+ gpt_reasoning_buffer = ""
341
344
342
345
stream_response = response .iter_lines ()
343
346
while True :
@@ -347,9 +350,9 @@ def predict(
347
350
break
348
351
except requests .exceptions .ConnectionError :
349
352
chunk = next (stream_response ) # 失败了,重试一次?再失败就没办法了。
350
- response_text , finish_reason = decode_chunk (chunk )
353
+ response_text , reasoning_content , finish_reason = decode_chunk (chunk )
351
354
# 返回的数据流第一次为空,继续等待
352
- if response_text == "" and finish_reason != "False" :
355
+ if response_text == "" and ( reasoning == False or reasoning_content == "" ) and finish_reason != "False" :
353
356
status_text = f"finish_reason: { finish_reason } "
354
357
yield from update_ui (
355
358
chatbot = chatbot , history = history , msg = status_text
@@ -379,9 +382,14 @@ def predict(
379
382
logger .info (f"[response] { gpt_replying_buffer } " )
380
383
break
381
384
status_text = f"finish_reason: { finish_reason } "
382
- gpt_replying_buffer += response_text
383
- # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
384
- history [- 1 ] = gpt_replying_buffer
385
+ if reasoning :
386
+ gpt_replying_buffer += response_text
387
+ gpt_reasoning_buffer += reasoning_content
388
+ history [- 1 ] = '\n ' .join (map (lambda x : '> ' + x , gpt_reasoning_buffer .split ('\n ' ))) + '\n \n ' + gpt_replying_buffer
389
+ else :
390
+ gpt_replying_buffer += response_text
391
+ # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
392
+ history [- 1 ] = gpt_replying_buffer
385
393
chatbot [- 1 ] = (history [- 2 ], history [- 1 ])
386
394
yield from update_ui (
387
395
chatbot = chatbot , history = history , msg = status_text
0 commit comments