@@ -203,17 +203,34 @@ static std::string commandToName(Command::CommandType Type) {
203203}
204204#endif
205205
206- static std::vector<RT::PiEvent>
207- getPiEvents (const std::vector<EventImplPtr> &EventImpls) {
206+ std::vector<RT::PiEvent>
207+ Command:: getPiEvents (const std::vector<EventImplPtr> &EventImpls) const {
208208 std::vector<RT::PiEvent> RetPiEvents;
209209 for (auto &EventImpl : EventImpls) {
210- if (EventImpl->getHandleRef () != nullptr )
211- RetPiEvents.push_back (EventImpl->getHandleRef ());
210+ if (EventImpl->getHandleRef () == nullptr )
211+ continue ;
212+
213+ // Do not add redundant event dependencies for in-order queues.
214+ // At this stage dependency is definitely pi task and need to check if
215+ // current one is a host task. In this case we should not skip pi event due
216+ // to different sync mechanisms for different task types on in-order queue.
217+ const QueueImplPtr &WorkerQueue = getWorkerQueue ();
218+ if (EventImpl->getWorkerQueue () == WorkerQueue &&
219+ WorkerQueue->isInOrder () && !isHostTask ())
220+ continue ;
221+
222+ RetPiEvents.push_back (EventImpl->getHandleRef ());
212223 }
213224
214225 return RetPiEvents;
215226}
216227
228+ bool Command::isHostTask () const {
229+ return (MType == CommandType::RUN_CG) /* host task has this type also */ &&
230+ ((static_cast <const ExecCGCommand *>(this ))->getCG ().getType () ==
231+ CG::CGTYPE::CodeplayHostTask);
232+ }
233+
217234static void flushCrossQueueDeps (const std::vector<EventImplPtr> &EventImpls,
218235 const QueueImplPtr &Queue) {
219236 for (auto &EventImpl : EventImpls) {
@@ -240,7 +257,8 @@ class DispatchHostTask {
240257 // sophisticated waiting mechanism to allow to utilize this thread for any
241258 // other available job and resume once all required events are ready.
242259 for (auto &PluginWithEvents : RequiredEventsPerPlugin) {
243- std::vector<RT::PiEvent> RawEvents = getPiEvents (PluginWithEvents.second );
260+ std::vector<RT::PiEvent> RawEvents =
261+ MThisCmd->getPiEvents (PluginWithEvents.second );
244262 try {
245263 PluginWithEvents.first ->call <PiApiKind::piEventsWait>(RawEvents.size (),
246264 RawEvents.data ());
@@ -393,10 +411,12 @@ void Command::waitForEvents(QueueImplPtr Queue,
393411Command::Command (CommandType Type, QueueImplPtr Queue)
394412 : MQueue(std::move(Queue)),
395413 MEvent (std::make_shared<detail::event_impl>(MQueue)),
414+ MWorkerQueue(MEvent->getWorkerQueue ()),
396415 MPreparedDepsEvents(MEvent->getPreparedDepsEvents ()),
397416 MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents ()),
398417 MType(Type) {
399418 MSubmittedQueue = MQueue;
419+ MWorkerQueue = MQueue;
400420 MEvent->setCommand (this );
401421 MEvent->setContextImpl (MQueue->getContextImplPtr ());
402422 MEvent->setStateIncomplete ();
@@ -600,12 +620,6 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
600620
601621 Command *ConnectionCmd = nullptr ;
602622
603- // Do not add redundant event dependencies for in-order queues.
604- if (Dep.MDepCommand && Dep.MDepCommand ->getWorkerQueue () == WorkerQueue &&
605- WorkerQueue->has_property <property::queue::in_order>() &&
606- getType () != CommandType::HOST_TASK)
607- return nullptr ;
608-
609623 ContextImplPtr DepEventContext = DepEvent->getContextImpl ();
610624 // If contexts don't match we'll connect them using host task
611625 if (DepEventContext != WorkerContext && !WorkerContext->is_host ()) {
@@ -621,14 +635,14 @@ const ContextImplPtr &Command::getWorkerContext() const {
621635 return MQueue->getContextImplPtr ();
622636}
623637
624- const QueueImplPtr &Command::getWorkerQueue () const { return MQueue; }
638+ const QueueImplPtr &Command::getWorkerQueue () const {
639+ assert (MWorkerQueue && " MWorkerQueue must not be nullptr" );
640+ return MWorkerQueue;
641+ }
625642
626643bool Command::producesPiEvent () const { return true ; }
627644
628- bool Command::supportsPostEnqueueCleanup () const {
629- // Isolated commands are cleaned up separately
630- return !MUsers.empty () || !MDeps.empty ();
631- }
645+ bool Command::supportsPostEnqueueCleanup () const { return true ; }
632646
633647Command *Command::addDep (DepDesc NewDep, std::vector<Command *> &ToCleanUp) {
634648 Command *ConnectionCmd = nullptr ;
@@ -1298,6 +1312,9 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq,
12981312 if (!MSrcQueue->is_host ()) {
12991313 MEvent->setContextImpl (MSrcQueue->getContextImplPtr ());
13001314 }
1315+
1316+ MWorkerQueue = MQueue->is_host () ? MSrcQueue : MQueue;
1317+
13011318 emitInstrumentationDataProxy ();
13021319}
13031320
@@ -1335,10 +1352,6 @@ const ContextImplPtr &MemCpyCommand::getWorkerContext() const {
13351352 return getWorkerQueue ()->getContextImplPtr ();
13361353}
13371354
1338- const QueueImplPtr &MemCpyCommand::getWorkerQueue () const {
1339- return MQueue->is_host () ? MSrcQueue : MQueue;
1340- }
1341-
13421355bool MemCpyCommand::producesPiEvent () const {
13431356 // TODO remove this workaround once the batching issue is addressed in Level
13441357 // Zero plugin.
@@ -1481,6 +1494,8 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq,
14811494 MEvent->setContextImpl (MSrcQueue->getContextImplPtr ());
14821495 }
14831496
1497+ MWorkerQueue = MQueue->is_host () ? MSrcQueue : MQueue;
1498+
14841499 emitInstrumentationDataProxy ();
14851500}
14861501
@@ -1518,10 +1533,6 @@ const ContextImplPtr &MemCpyCommandHost::getWorkerContext() const {
15181533 return getWorkerQueue ()->getContextImplPtr ();
15191534}
15201535
1521- const QueueImplPtr &MemCpyCommandHost::getWorkerQueue () const {
1522- return MQueue->is_host () ? MSrcQueue : MQueue;
1523- }
1524-
15251536pi_int32 MemCpyCommandHost::enqueueImp () {
15261537 const QueueImplPtr &Queue = getWorkerQueue ();
15271538 waitForPreparedHostEvents ();
0 commit comments