Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import paddle.jit.dy2static.utils as jit_utils
import paddle.nn.layer
from paddle.base.core import CUDAGraph
from paddle.device.cuda import graphs

from fastdeploy import envs
Expand Down Expand Up @@ -93,7 +92,10 @@ def __init__(self, fd_config: FDConfig, runnable: Callable):
self.warm_up_size = fd_config.graph_opt_config.cudagraph_num_of_warmups
self.real_shape_to_captured_size = fd_config.graph_opt_config.real_shape_to_captured_size
if self.fd_config.graph_opt_config.use_unique_memory_pool:
self.unique_memory_pool_id = CUDAGraph.gen_new_memory_pool_id()
if paddle.is_compiled_with_cuda():
from paddle.base.core import CUDAGraph

self.unique_memory_pool_id = CUDAGraph.gen_new_memory_pool_id()
self._create_entry_dict()

self.cuda_graph_manager = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,11 @@ def apply(
"""
if layer.ep_size > 1:
if layer.fd_config.parallel_config.moe_phase.phase == "prefill":
if layer.fd_config.parallel_config.splitwise_role == "mixed":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个条件也还保留着

if layer.layer_idx == 0:
self.ep_prefill_runner.clean_low_latency_buffer()
return self.apply_ep_prefill(layer, x, gate)
else:
if layer.fd_config.parallel_config.splitwise_role == "mixed":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上

if layer.layer_idx == 0:
self.ep_decoder_runner.clean_low_latency_buffer()
return self.apply_ep_decode(layer, x, gate)
else:
Expand Down
Loading