Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1ee373e
Enable CB for vlms with multiple images and multiple prompts
mamtsing Oct 5, 2025
e9cf657
update text_generation_interface
mamtsing Oct 7, 2025
f62f71e
Updated text_generation to run CB for VLMs
asmigosw Oct 23, 2025
39f5574
Ruff format
asmigosw Oct 23, 2025
ecd5905
Updated qwen2_5 modelling for CB support
asmigosw Oct 24, 2025
3d9cd49
Updated qwen2_5 modelling for CB support
asmigosw Oct 24, 2025
165c8fb
Passed image queue in decode CB
asmigosw Oct 24, 2025
cf2d4fb
Ruff format
asmigosw Oct 24, 2025
d97bda9
refactored the code
quic-rishinr Oct 27, 2025
e663cd6
Qwen2.5vl CB Update
Oct 28, 2025
799af59
Lint fix and code cleaning
quic-rishinr Oct 28, 2025
621e3a8
fix for fbs >1
quic-rishinr Oct 28, 2025
59dff46
Updated cloud_ai_100_exec_kv call in modelling_auto
quic-rishinr Oct 29, 2025
fc69e3a
Removed redundant vision execution and refactoring
quic-rishinr Oct 30, 2025
275d4cd
nit: update QBlocking to LM Attention in Qwen2.5VL
Oct 30, 2025
e8253ff
nit: update readme for qblocking in example file and lint/format checks
Oct 30, 2025
4d9afe2
nit: lint/format checks
Oct 30, 2025
6b079fe
CI failure fix
quic-rishinr Oct 30, 2025
64dffdd
qwen2_5_vl inference changes
Oct 30, 2025
ca0cc03
Removed CB regard kw args for functionin non CB models
quic-rishinr Oct 31, 2025
b0ee5a4
Added caching for vision outputs
quic-rishinr Oct 31, 2025
a0a80c0
Added change for poping prompt while running continuous batching
quic-rishinr Nov 3, 2025
0892480
Adding multi_frame modeling changes and some fix
Nov 3, 2025
875c4d2
Adressed review comments
quic-rishinr Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions QEfficient/generation/cloud_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ def __init__(
self.program = qaicrt.Program(self.context, None, qpc, prog_properties)
if self.program.load() != qaicrt.QStatus.QS_SUCCESS:
raise RuntimeError("Failed to load program")
self.is_active = False
if activate:
self.activate()
self.is_active = True
# Create input qbuffers and buf_dims
self.qbuffers = [qaicrt.QBuffer(bytes(binding.size)) for binding in self.bindings]
self.buf_dims = qaicrt.BufferDimensionsVecRef(
Expand All @@ -108,15 +110,17 @@ def output_names(self) -> List[str]:

def activate(self):
"""Activate qpc"""

self.program.activate()
self.execObj = qaicrt.ExecObj(self.context, self.program)
if not self.is_active:
self.program.activate()
self.execObj = qaicrt.ExecObj(self.context, self.program)
self.is_active = True

def deactivate(self):
"""Deactivate qpc"""

del self.execObj
self.program.deactivate()
if self.is_active:
del self.execObj
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT pick, we should always have a cleanup method declared for execObj, else it may lead to silent memory leaks. Just deleting the object may lead to hidden memory leaks.

self.program.deactivate()
self.is_active = False

def set_buffers(self, buffers: Dict[str, np.ndarray]):
"""
Expand Down
Loading
Loading