@@ -28,7 +28,7 @@ void allocate_tensor_impl(ov::SoPtr<ov::ITensor>& tensor,
2828LlamaCppSyncInferRequest::LlamaCppSyncInferRequest (const std::shared_ptr<const LlamaCppModel>& compiled_model,
2929 size_t num_threads)
3030 : ov::ISyncInferRequest(compiled_model) {
31- OPENVINO_DEBUG << " llama_cpp_plugin: infer request ctor called\n " ;
31+ OPENVINO_DEBUG ( " llama_cpp_plugin: infer request ctor called\n " ) ;
3232 llama_context_params cparams = llama_context_default_params ();
3333 cparams.n_threads = num_threads ? num_threads : std::thread::hardware_concurrency ();
3434 cparams.n_ctx = 0 ; // this means that the actual n_ctx will be taken equal to the model's train-time value
@@ -51,7 +51,7 @@ LlamaCppSyncInferRequest::LlamaCppSyncInferRequest(const std::shared_ptr<const L
5151}
5252void LlamaCppSyncInferRequest::set_tensors_impl (const ov::Output<const ov::Node> port,
5353 const std::vector<ov::SoPtr<ov::ITensor>>& tensors) {
54- OPENVINO_DEBUG << " llama_cpp_plugin: set_tensors_impl called\n " ;
54+ OPENVINO_DEBUG ( " llama_cpp_plugin: set_tensors_impl called\n " ) ;
5555}
5656
5757void llama_batch_add_reimpl (struct llama_batch & batch,
@@ -131,12 +131,12 @@ void LlamaCppSyncInferRequest::infer() {
131131 llama_batch_free (batch);
132132};
133133std::vector<ov::ProfilingInfo> LlamaCppSyncInferRequest::get_profiling_info () const {
134- OPENVINO_DEBUG << " llama_cpp_plugin: get_profiling_info() called\n " ;
134+ OPENVINO_DEBUG ( " llama_cpp_plugin: get_profiling_info() called\n " ) ;
135135 return std::vector<ov::ProfilingInfo>{};
136136};
137137
138138std::vector<ov::SoPtr<ov::IVariableState>> LlamaCppSyncInferRequest::query_state () const {
139- OPENVINO_DEBUG << " llama_cpp_plugin: query_state() called\n " ;
139+ OPENVINO_DEBUG ( " llama_cpp_plugin: query_state() called\n " ) ;
140140 return {std::static_pointer_cast<ov::IVariableState>(std::make_shared<LlamaCppState>(m_llama_ctx))};
141141}
142142
0 commit comments