Skip to content

Commit eeb2b19

Browse files
committed
docs: add LLM sample
1 parent e96f866 commit eeb2b19

File tree

7 files changed

+303
-0
lines changed

7 files changed

+303
-0
lines changed

.license-ignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ requirements-dev.txt
1818
.gitignore
1919
.license-ignore
2020
./samples/proto/*
21+
./samples/llm/*

samples/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@
66
2. [**serialization**](./serialization): Writing and using custom serialization functions, including protobuf, JSON, and more.
77
3. [**stream**](./stream): Using streaming calls, including `ClientStream`, `ServerStream`, and `BidirectionalStream`.
88
4. [**registry**](./registry): Using service registration and discovery features.
9+
5. [**LLM Integration**](./llm): Easily integrating LLMs with Dubbo Python, providing RPC services using models like DeepSeek R1.
910

samples/llm/README.md

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
## Integrating LLM
2+
3+
Dubbo Python can easily integrate with LLMs and provide RPC services.
4+
5+
- **Model**: DeepSeek-R1-Distill-Qwen-7B
6+
- **Model Deployment Framework**: LMDeploy
7+
- **GPU**: NVIDIA Corporation GA102GL [A10] (rev a1)
8+
9+
**Description**: This example demonstrates the use of [DeepSeek R1](https://github.com/deepseek-ai/DeepSeek-R1) and [LMDeploy](https://github.com/InternLM/lmdeploy) for deployment, but the overall process is applicable to other models and inference frameworks. If you wish to deploy using Docker or other containerization methods, refer to the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) documentation for relevant configuration steps.
10+
11+
### Basic Environment
12+
13+
```sh
14+
----------
15+
Operating System: Ubuntu 22.04.5
16+
Python Version: 3.11.10
17+
PyTorch Version: 2.5.1
18+
----------
19+
```
20+
21+
### Model Download
22+
23+
Use the `snapshot_download` function provided by modelscope to download the model. The first parameter is the model name, and the `cache_dir` parameter specifies the download path for the model.
24+
25+
```python
26+
from modelscope import snapshot_download
27+
28+
model_dir = snapshot_download('deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', cache_dir='/home/dubbo/model', revision='master')
29+
```
30+
31+
### Core code
32+
33+
```python
34+
from time import sleep
35+
36+
from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline
37+
38+
from dubbo import Dubbo
39+
from dubbo.configs import RegistryConfig, ServiceConfig
40+
from dubbo.proxy.handlers import RpcMethodHandler, RpcServiceHandler
41+
import chat_pb2
42+
43+
# the path of a model. It could be one of the following options:
44+
# 1. A local directory path of a turbomind model
45+
# 2. The model_id of a lmdeploy-quantized model
46+
# 3. The model_id of a model hosted inside a model repository
47+
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
48+
49+
backend_config = TurbomindEngineConfig(cache_max_entry_count=0.2, max_context_token_num=20544, session_len=20544)
50+
51+
gen_config = GenerationConfig(
52+
top_p=0.95,
53+
temperature=0.6,
54+
max_new_tokens=8192,
55+
stop_token_ids=[151329, 151336, 151338],
56+
do_sample=True, # enable sampling
57+
)
58+
59+
60+
class DeepSeekAiServicer:
61+
def __init__(self, model: str, backend_config: TurbomindEngineConfig, gen_config: GenerationConfig):
62+
self.llm = pipeline(model, backend_config=backend_config)
63+
self.gen_config = gen_config
64+
65+
def chat(self, stream):
66+
# read request from stream
67+
request = stream.read()
68+
print(f"Received request: {request}")
69+
# prepare prompts
70+
prompts = [{"role": request.role, "content": request.content + "<think>\n"}]
71+
72+
is_think = False
73+
74+
# perform streaming inference
75+
for item in self.llm.stream_infer(prompts, gen_config=gen_config):
76+
# update think status
77+
if item.text == "<think>":
78+
is_think = True
79+
continue
80+
elif item.text == "</think>":
81+
is_think = False
82+
continue
83+
# According to the state of thought, decide the content of the reply.
84+
if is_think:
85+
# send thought
86+
stream.write(chat_pb2.ChatReply(think=item.text, answer=""))
87+
else:
88+
# send answer
89+
stream.write(chat_pb2.ChatReply(think="", answer=item.text))
90+
91+
stream.done_writing()
92+
93+
94+
def build_server_handler():
95+
# build a method handler
96+
deepseek_ai_servicer = DeepSeekAiServicer(model, backend_config, gen_config)
97+
method_handler = RpcMethodHandler.server_stream(
98+
deepseek_ai_servicer.chat,
99+
method_name="chat",
100+
request_deserializer=chat_pb2.ChatRequest.FromString,
101+
response_serializer=chat_pb2.ChatReply.SerializeToString,
102+
)
103+
# build a service handler
104+
service_handler = RpcServiceHandler(
105+
service_name="org.apache.dubbo.samples.llm.api.DeepSeekAiService",
106+
method_handlers=[method_handler],
107+
)
108+
return service_handler
109+
110+
111+
if __name__ == "__main__":
112+
# build a service handler
113+
service_handler = build_server_handler()
114+
service_config = ServiceConfig(service_handler=service_handler)
115+
116+
# Configure the Zookeeper registry
117+
registry_config = RegistryConfig.from_url("zookeeper://zookeeper:2181")
118+
bootstrap = Dubbo(registry_config=registry_config)
119+
120+
# Create and start the server
121+
bootstrap.create_server(service_config).start()
122+
123+
# 30days
124+
sleep(30 * 24 * 60 * 60)
125+
126+
```
127+

samples/llm/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.

samples/llm/chat.proto

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
syntax = "proto3";
2+
3+
option java_multiple_files = true;
4+
option java_outer_classname = "ChatProto";
5+
6+
package org.apache.dubbo.samples.llm.api;
7+
8+
message ChatRequest {
9+
string role = 1;
10+
string content = 2;
11+
}
12+
13+
message ChatReply {
14+
string think = 1;
15+
string answer = 2;
16+
}
17+
18+
service DeepSeekAiService {
19+
// chat
20+
rpc chat(ChatRequest) returns (stream ChatReply);
21+
}

samples/llm/chat_pb2.py

Lines changed: 32 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

samples/llm/main.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
from time import sleep
17+
18+
from lmdeploy import GenerationConfig, TurbomindEngineConfig, pipeline
19+
20+
from dubbo import Dubbo
21+
from dubbo.configs import RegistryConfig, ServiceConfig
22+
from dubbo.proxy.handlers import RpcMethodHandler, RpcServiceHandler
23+
import chat_pb2
24+
25+
# the path of a model. It could be one of the following options:
26+
# 1. A local directory path of a turbomind model
27+
# 2. The model_id of a lmdeploy-quantized model
28+
# 3. The model_id of a model hosted inside a model repository
29+
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
30+
31+
backend_config = TurbomindEngineConfig(cache_max_entry_count=0.2, max_context_token_num=20544, session_len=20544)
32+
33+
gen_config = GenerationConfig(
34+
top_p=0.95,
35+
temperature=0.6,
36+
max_new_tokens=8192,
37+
stop_token_ids=[151329, 151336, 151338],
38+
do_sample=True, # enable sampling
39+
)
40+
41+
42+
class DeepSeekAiServicer:
43+
def __init__(self, model: str, backend_config: TurbomindEngineConfig, gen_config: GenerationConfig):
44+
self.llm = pipeline(model, backend_config=backend_config)
45+
self.gen_config = gen_config
46+
47+
def chat(self, stream):
48+
# read request from stream
49+
request = stream.read()
50+
print(f"Received request: {request}")
51+
# prepare prompts
52+
prompts = [{"role": request.role, "content": request.content + "<think>\n"}]
53+
54+
is_think = False
55+
56+
# perform streaming inference
57+
for item in self.llm.stream_infer(prompts, gen_config=gen_config):
58+
# update think status
59+
if item.text == "<think>":
60+
is_think = True
61+
continue
62+
elif item.text == "</think>":
63+
is_think = False
64+
continue
65+
# According to the state of thought, decide the content of the reply.
66+
if is_think:
67+
# send thought
68+
stream.write(chat_pb2.ChatReply(think=item.text, answer=""))
69+
else:
70+
# send answer
71+
stream.write(chat_pb2.ChatReply(think="", answer=item.text))
72+
73+
stream.done_writing()
74+
75+
76+
def build_server_handler():
77+
# build a method handler
78+
deepseek_ai_servicer = DeepSeekAiServicer(model, backend_config, gen_config)
79+
method_handler = RpcMethodHandler.server_stream(
80+
deepseek_ai_servicer.chat,
81+
method_name="chat",
82+
request_deserializer=chat_pb2.ChatRequest.FromString,
83+
response_serializer=chat_pb2.ChatReply.SerializeToString,
84+
)
85+
# build a service handler
86+
service_handler = RpcServiceHandler(
87+
service_name="org.apache.dubbo.samples.llm.api.DeepSeekAiService",
88+
method_handlers=[method_handler],
89+
)
90+
return service_handler
91+
92+
93+
if __name__ == "__main__":
94+
# build a service handler
95+
service_handler = build_server_handler()
96+
service_config = ServiceConfig(service_handler=service_handler)
97+
98+
# Configure the Zookeeper registry
99+
registry_config = RegistryConfig.from_url("zookeeper://zookeeper:2181")
100+
bootstrap = Dubbo(registry_config=registry_config)
101+
102+
# Create and start the server
103+
bootstrap.create_server(service_config).start()
104+
105+
# 30days
106+
sleep(30 * 24 * 60 * 60)

0 commit comments

Comments
 (0)