-
Notifications
You must be signed in to change notification settings - Fork 2k
Added support for the "think" for Ollama #3386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
524f12c
af9958b
91fb151
f77e08a
b188adc
552a346
557a98e
afc3ba2
3411fe2
abfc9e1
f61a99f
2f9417a
dfb2522
b8a1115
64bee7b
faa8b7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
| * | ||
| * @author Siarhei Blashuk | ||
| * @author Thomas Vitale | ||
| * @author Sun Yuhan | ||
| * @since 1.0.0 | ||
| */ | ||
| public enum OllamaModel implements ChatModelDescription { | ||
|
|
@@ -32,6 +33,21 @@ public enum OllamaModel implements ChatModelDescription { | |
| */ | ||
| QWEN_2_5_7B("qwen2.5"), | ||
|
|
||
| /** | ||
| * Qwen3 | ||
| */ | ||
| QWEN_3_8B("qwen3"), | ||
|
|
||
| /** | ||
| * Qwen3 1.7b | ||
| */ | ||
| QWEN_3_1_7_B("qwen3:1.7b"), | ||
|
|
||
| /** | ||
| * Qwen3 0.6b | ||
| */ | ||
| QWEN_3_06B("qwen3:0.6b"), | ||
|
|
||
|
Comment on lines
+56
to
+64
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Among the existing constants, there is QWEN3_4B, while the newly added ones are QWEN_3_1_7_B and QWEN_3_06B. The naming styles are inconsistent (such as the use of underscores and the separation of numbers and units). It is recommended to adopt a unified style (for example, using QWEN3_1_7B / QWEN3_0_6B consistently or applying a uniform underscore scheme) to ensure readability and searchability. |
||
| /** | ||
| * QwQ is the reasoning model of the Qwen series. | ||
| */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,14 +33,16 @@ | |
| import org.springframework.ai.ollama.api.OllamaApi.Message.Role; | ||
|
|
||
| import static org.assertj.core.api.Assertions.assertThat; | ||
| import static org.junit.jupiter.api.Assertions.assertNull; | ||
|
|
||
| /** | ||
| * @author Christian Tzolov | ||
| * @author Thomas Vitale | ||
| * @author Sun Yuhan | ||
| */ | ||
| public class OllamaApiIT extends BaseOllamaIT { | ||
|
|
||
| private static final String MODEL = OllamaModel.LLAMA3_2.getName(); | ||
| private static final String MODEL = OllamaModel.QWEN_3_1_7_B.getName(); | ||
|
|
||
| @BeforeAll | ||
| public static void beforeAll() throws IOException, InterruptedException { | ||
|
|
@@ -107,11 +109,67 @@ public void embedText() { | |
|
|
||
| assertThat(response).isNotNull(); | ||
| assertThat(response.embeddings()).hasSize(1); | ||
| assertThat(response.embeddings().get(0)).hasSize(3072); | ||
| assertThat(response.embeddings().get(0)).hasSize(2048); | ||
| assertThat(response.model()).isEqualTo(MODEL); | ||
| assertThat(response.promptEvalCount()).isEqualTo(5); | ||
| assertThat(response.loadDuration()).isGreaterThan(1); | ||
| assertThat(response.totalDuration()).isGreaterThan(1); | ||
| } | ||
|
|
||
| @Test | ||
| public void streamChatWithThinking() { | ||
| var request = ChatRequest.builder(MODEL) | ||
| .stream(true) | ||
| .think(true) | ||
| .messages(List.of(Message.builder(Role.USER).content("What are the planets in the solar system?").build())) | ||
| .options(OllamaOptions.builder().temperature(0.9).build().toMap()) | ||
| .build(); | ||
|
|
||
| Flux<ChatResponse> response = getOllamaApi().streamingChat(request); | ||
|
|
||
| List<ChatResponse> responses = response.collectList().block(); | ||
| System.out.println(responses); | ||
|
|
||
| assertThat(responses).isNotNull(); | ||
| assertThat(responses.stream() | ||
| .filter(r -> r.message() != null) | ||
| .map(r -> r.message().thinking()) | ||
| .collect(Collectors.joining(System.lineSeparator()))).contains("solar"); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this assertion is failing for me. |
||
|
|
||
| ChatResponse lastResponse = responses.get(responses.size() - 1); | ||
| assertThat(lastResponse.message().content()).isEmpty(); | ||
| assertNull(lastResponse.message().thinking()); | ||
| assertThat(lastResponse.done()).isTrue(); | ||
| } | ||
|
|
||
| @Test | ||
| public void streamChatWithoutThinking() { | ||
| var request = ChatRequest.builder(MODEL) | ||
| .stream(true) | ||
| .think(false) | ||
| .messages(List.of(Message.builder(Role.USER).content("What are the planets in the solar system?").build())) | ||
| .options(OllamaOptions.builder().temperature(0.9).build().toMap()) | ||
| .build(); | ||
|
|
||
| Flux<ChatResponse> response = getOllamaApi().streamingChat(request); | ||
|
|
||
| List<ChatResponse> responses = response.collectList().block(); | ||
| System.out.println(responses); | ||
|
|
||
| assertThat(responses).isNotNull(); | ||
|
|
||
| assertThat(responses.stream() | ||
| .filter(r -> r.message() != null) | ||
| .map(r -> r.message().content()) | ||
| .collect(Collectors.joining(System.lineSeparator()))).contains("Earth"); | ||
|
|
||
| assertThat(responses.stream().filter(r -> r.message() != null).allMatch(r -> r.message().thinking() == null)) | ||
| .isTrue(); | ||
|
|
||
| ChatResponse lastResponse = responses.get(responses.size() - 1); | ||
| assertThat(lastResponse.message().content()).isEmpty(); | ||
| assertNull(lastResponse.message().thinking()); | ||
| assertThat(lastResponse.done()).isTrue(); | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Enter null, ensure it is not serialized into a JSON field to trigger the server's default behavior. It is recommended to set it only when non-empty think