Fix lint issues

preetha-intel · preetha-intel · commit 032d6db65d9b · 2025-06-11T06:48:36.000-07:00
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -113,7 +113,7 @@ BackendManager::BackendManager(SessionContext& session_context,
     subgraph_context_.has_dynamic_input_shape = true;
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
     if (cpu_or_gpu || (npu && session_context_.enable_causallm) &&
-        !session_context_.disable_dynamic_shapes) {
+                          !session_context_.disable_dynamic_shapes) {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
                          << "Creating backend Dynamic Shapes";
       try {
@@ -448,7 +448,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
   const auto& onnx_model_path_name = subgraph.ModelPath();
   // QDQ stripping enabled only for the NPU and experimentally on the GPU
   if ((session_context_.device_type.find("NPU") != std::string::npos ||
-      session_context_.device_type.find("GPU") != std::string::npos) &&
+       session_context_.device_type.find("GPU") != std::string::npos) &&
       (enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
     std::unique_ptr<onnxruntime::Model> model;
     Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_.shared_weights);
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -390,7 +390,6 @@ void BasicBackend::ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>
   }
 }
 
-
 void BasicBackend::RewindKVCache(size_t index) {
   OVInferRequestPtr infer_request;
   infer_request = inferRequestsQueue_->getIdleRequest();
@@ -402,7 +401,6 @@ void BasicBackend::RewindKVCache(size_t index) {
 // an Infer Request indexed by infer_req_idx
 void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
   try {
-
     const bool is_cpu = session_context_.device_type.find("CPU") != std::string::npos;
     const bool is_gpu = session_context_.device_type.find("GPU") != std::string::npos;
     const bool is_npu = session_context_.device_type.find("NPU") != std::string::npos;
@@ -411,15 +409,14 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
     ov_shapes = classify_shape_flags(exe_network_.Get());
     // Loop over subgraph original input names to find the correspondent OV input name
     for (const auto& input_info : bindings_->network_inputs_) {
-
       size_t batch_slice_idx = 0;
       auto tensor = context.GetInput(input_info.onnx_index);
       auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
       auto tensor_shape = tensor_info.GetShape();
       auto tensor_data = tensor.GetTensorData<char>();
       if (ov_shapes.has_bounded_dynamic) {
-            ov::PartialShape partial_shape = input_info.ov_shape;
-            ValidateOrtDimsAgainstPartialShape(tensor_shape, partial_shape);
+        ov::PartialShape partial_shape = input_info.ov_shape;
+        ValidateOrtDimsAgainstPartialShape(tensor_shape, partial_shape);
       }
       ov::Shape input_tensor_shape(tensor_shape.begin(), tensor_shape.end());
       OVTensorPtr tensor_ptr;
@@ -434,7 +431,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
         } else {
           if (is_cpu) {
             tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_tensor_shape, (void*)tensor_data);
-          } else { // GPU
+          } else {  // GPU
             tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_tensor_shape);
             FillInputBlob(tensor_ptr, batch_slice_idx, input_info.name, context, subgraph_context_);
           }
@@ -445,14 +442,14 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
             ORT_THROW(msg);
           }
         }
-      } else { // Other device path
+      } else {  // Other device path
         ort_tensor_key_t ort_tensor_key{input_info.name};
         auto it = ort_ov_tensor_map.find(ort_tensor_key);
 
         if (it == ort_ov_tensor_map.end() || it->second.ort_ptr != tensor.GetTensorRawData()) {
           ov_tensor_data_t ov_tensor_data;
           ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_tensor_shape,
-                                                                     const_cast<void*>(tensor.GetTensorRawData()));
+                                                                   const_cast<void*>(tensor.GetTensorRawData()));
           ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
           ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
 
@@ -502,7 +499,7 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
   // Wait for Async inference completion
   try {
     infer_request->WaitRequest();
-  } catch(const std::runtime_error& e) {
+  } catch (const std::runtime_error& e) {
     infer_request->CancelRequest();
     inferRequestsQueue_->deleteRequest();
     ORT_THROW(log_tag + e.what());
@@ -513,18 +510,18 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
   bool npu = session_context_.device_type.find("NPU") != std::string::npos;
   if (cpu_or_gpu || (npu && (session_context_.enable_causallm || ov_shapes.is_static))) {
     for (const auto& output_info : bindings_->network_outputs_) {
-        OVTensorPtr graph_output_blob;
-        try {
-          graph_output_blob = infer_request->GetTensor(output_info.name);
-        } catch (const char* msg) {
-          ORT_THROW(msg);
-        }
-        size_t batch_size = 1;
-        Ort::UnownedValue output_tensor =
-            GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
-        auto mem_info = output_tensor.GetTensorMemoryInfo();
-        if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
-          return;
+      OVTensorPtr graph_output_blob;
+      try {
+        graph_output_blob = infer_request->GetTensor(output_info.name);
+      } catch (const char* msg) {
+        ORT_THROW(msg);
+      }
+      size_t batch_size = 1;
+      Ort::UnownedValue output_tensor =
+          GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
+      auto mem_info = output_tensor.GetTensorMemoryInfo();
+      if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
+        return;
       } else {
         size_t batch_slice = 0;
         FillOutputBlob(std::move(graph_output_blob), output_tensor, batch_slice);
@@ -586,7 +583,7 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
   } else {
     OVInferRequestPtr infer_request;
     infer_request = inferRequestsQueue_->getIdleRequest();
-    if(infer_request == nullptr) {
+    if (infer_request == nullptr) {
       ORT_THROW("OpenVINO Execution Provider :: There are no inference requests");
       LOGS_DEFAULT(FATAL) << log_tag << "Create Infer Requests do not exist";
       return;
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -31,13 +31,13 @@ struct ov_tensor_data_t {
 };
 
 struct DynamicFlags {
-    bool is_static = true;          // default true if no dynamic dims
-    bool has_fully_dynamic = false;
-    bool has_bounded_dynamic = false;
+  bool is_static = true;  // default true if no dynamic dims
+  bool has_fully_dynamic = false;
+  bool has_bounded_dynamic = false;
 
-    bool is_mixed() const {
-        return has_fully_dynamic && has_bounded_dynamic;
-    }
+  bool is_mixed() const {
+    return has_fully_dynamic && has_bounded_dynamic;
+  }
 };
 
 struct OnnxToOvNetworkBindings {
@@ -62,8 +62,8 @@ struct OnnxToOvNetworkBindings {
         // However, these tensors are internally converted to a stateful representation, which removes them.
         // To prevent runtime exceptions, we simply continue processing here.
         if ((onnx_name.empty() || onnx_name == "beam_idx" ||
-            onnx_name.find("past_key_values") != std::string::npos ||
-            onnx_name.find("present") != std::string::npos) &&
+             onnx_name.find("past_key_values") != std::string::npos ||
+             onnx_name.find("present") != std::string::npos) &&
             session_context.enable_causallm) {
           continue;
         }
@@ -133,12 +133,11 @@ class BasicBackend : public IBackend {
   DynamicFlags ov_shapes;
 };
 
-
 class InferRequestsQueue {
  public:
   InferRequestsQueue(OVExeNetwork& net, size_t nireq, std::function<void(OVInferRequestPtr)> initializer) {
     OVInferRequestPtr infer_request;
-    live_threads=nireq;
+    live_threads = nireq;
     for (size_t id = 0; id < nireq; id++) {
       infer_request = net.CreateInferRequest();
       initializer(infer_request);
@@ -170,7 +169,7 @@ class InferRequestsQueue {
 
   OVInferRequestPtr getIdleRequest() {
     std::unique_lock<std::mutex> lock(_mutex);
-    if(live_threads==0) {
+    if (live_threads == 0) {
       return nullptr;
     }
 
@@ -182,7 +181,7 @@ class InferRequestsQueue {
 
   void deleteRequest() {
     std::unique_lock<std::mutex> lock(_mutex);
-    live_threads=live_threads-1;
+    live_threads = live_threads - 1;
   }
 
  private:
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -240,9 +240,9 @@ common::Status OpenVINOExecutionProvider::SetEpDynamicOptions(gsl::span<const ch
       if (workload_type != "") {
         LOGS_DEFAULT(VERBOSE) << "SetEpDynamicOptions - modifying: " << key << "/" << value;
         for (auto& backend : backend_managers_) {
-            ov::CompiledModel ov_compiled_model = backend.GetOVCompiledModel();
-            if(ov_compiled_model) {
-              ov_compiled_model.set_property(ov::workload_type(workload_type));
+          ov::CompiledModel ov_compiled_model = backend.GetOVCompiledModel();
+          if (ov_compiled_model) {
+            ov_compiled_model.set_property(ov::workload_type(workload_type));
           } else {
             LOGS_DEFAULT(VERBOSE) << "Model is not compiled in OV as its dynamic";
             ov::AnyMap map;
diff --git a/onnxruntime/core/providers/openvino/ov_allocator.cc b/onnxruntime/core/providers/openvino/ov_allocator.cc
@@ -34,12 +34,12 @@ void OVRTAllocator::Free(void* p) {
   try {
     ov::Tensor* tensor = nullptr;
     {
-    std::lock_guard<std::mutex> lock(mutex_);
-    auto it = allocated_.find(p);
-    if (it != allocated_.end()) {
-      tensor = it->second;
-      allocated_.erase(it);
-    }
+      std::lock_guard<std::mutex> lock(mutex_);
+      auto it = allocated_.find(p);
+      if (it != allocated_.end()) {
+        tensor = it->second;
+        allocated_.erase(it);
+      }
     }
     if (tensor) {
       delete tensor;
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -364,7 +364,7 @@ StatefulOVInferRequest::StatefulOVInferRequest(ov::InferRequest infer_request, s
 }
 
 void StatefulOVInferRequest::FillTensor(const std::string& tensor_name, const ov::element::Type& type,
-                                         const std::vector<size_t>& shape, int32_t fill_value) {
+                                        const std::vector<size_t>& shape, int32_t fill_value) {
   ov::Tensor tensor = ov::Tensor(type, shape);
   std::fill_n(tensor.data<int32_t>(), tensor.get_size(), fill_value);
   ovInfReq.set_tensor(tensor_name, tensor);
@@ -379,7 +379,7 @@ void StatefulOVInferRequest::CacheTensor(const std::string& tensor_name, std::ve
 }
 
 void StatefulOVInferRequest::SetTensorFromCache(const std::string& tensor_name,
-                                               const std::vector<int64_t>& cache_data) {
+                                                const std::vector<int64_t>& cache_data) {
   auto tensor = ovInfReq.get_tensor(tensor_name);
   auto new_shape = tensor.get_shape();
   new_shape[1] = cache_data.size();
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -132,7 +132,7 @@ class StatefulOVInferRequest : public OVInferRequest {
   void Infer() override;
   void RewindKVCache(size_t index) override;
   void FillTensor(const std::string& tensor_name, const ov::element::Type& type,
-                   const std::vector<size_t>& shape, int32_t fill_value);
+                  const std::vector<size_t>& shape, int32_t fill_value);
   void CacheTensor(const std::string& tensor_name, std::vector<int64_t>& cache);
   void SetTensorFromCache(const std::string& tensor_name, const std::vector<int64_t>& cache_data);
   std::optional<ov::Tensor> FindTensor(const std::string& tensor_name);
diff --git a/onnxruntime/python/tools/quantization/matmul_nbits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_nbits_quantizer.py
@@ -874,13 +874,18 @@ def quantize_matmul(self, node: NodeProto, graph_stack: list[GraphProto]) -> lis
             scales_tensor = onnx.numpy_helper.from_array(scales, b_tensor.name + "_DQ_scales")
 
         # if QDQ, CW and SYM enabled, optimize for Intel NPU, tranpose the weight to NHWC format will increase performance
-        qdq_opt_for_intel_npu_enabled = self.config.quant_format == QuantFormat.QDQ \
-            and self.config.channel_wised_quantize and self.config.is_symmetric
+        qdq_opt_for_intel_npu_enabled = (
+            self.config.quant_format == QuantFormat.QDQ
+            and self.config.channel_wised_quantize
+            and self.config.is_symmetric
+        )
         if qdq_opt_for_intel_npu_enabled:
             rows, cols = b_ndarray.shape
             packed = transpose_packed_int4_matrix(packed, rows, cols)
-            scales = scales.reshape((cols, 1)) # (cols, 1)
-            b_quant = onnx.helper.make_tensor(b_tensor.name + f"_DQ_Q{bits}", qtype, [cols, rows], packed.tobytes(), True)
+            scales = scales.reshape((cols, 1))  # (cols, 1)
+            b_quant = onnx.helper.make_tensor(
+                b_tensor.name + f"_DQ_Q{bits}", qtype, [cols, rows], packed.tobytes(), True
+            )
             scales_tensor = onnx.numpy_helper.from_array(scales, b_tensor.name + "_DQ_scales")
 
         for input in b_graph.input:
@@ -924,7 +929,10 @@ def quantize_matmul(self, node: NodeProto, graph_stack: list[GraphProto]) -> lis
             dq_output_names = [b_quant.name + "_output"]
             tp_input_names = [dq_output_names[0]]
             tp_output_names = [dq_output_names[0] + "_transposed"]
-            matmul_input_names = [node.input[0], tp_output_names[0] if qdq_opt_for_intel_npu_enabled else dq_output_names[0]]
+            matmul_input_names = [
+                node.input[0],
+                tp_output_names[0] if qdq_opt_for_intel_npu_enabled else dq_output_names[0],
+            ]
             matmul_output_names = [node.output[0]]
             if not self.config.is_symmetric:
                 zp_tensor = onnx.helper.make_tensor(
@@ -935,7 +943,7 @@ def quantize_matmul(self, node: NodeProto, graph_stack: list[GraphProto]) -> lis
             rows, cols = b_ndarray.shape
             dq_kwargs = {
                 "axis": 1 if qdq_opt_for_intel_npu_enabled else 0,
-                "block_size": rows if self.config.channel_wised_quantize else self.config.block_size
+                "block_size": rows if self.config.channel_wised_quantize else self.config.block_size,
             }
             dq_node = onnx.helper.make_node(
                 "DequantizeLinear",
@@ -955,7 +963,7 @@ def quantize_matmul(self, node: NodeProto, graph_stack: list[GraphProto]) -> lis
                     "Transpose",
                     inputs=tp_input_names,
                     outputs=tp_output_names,
-                    perm=[1,0],
+                    perm=[1, 0],
                 )
                 output_nodes.extend([dq_node, tp_node, matmul_node])
             else:
diff --git a/onnxruntime/test/providers/openvino/openvino_ep_context_test.cc b/onnxruntime/test/providers/openvino/openvino_ep_context_test.cc
@@ -25,27 +25,21 @@
 using namespace ONNX_NAMESPACE;
 using namespace onnxruntime::logging;
 
-
 extern std::unique_ptr<Ort::Env> ort_env;
 
 class OVEPEPContextTests : public ::testing::Test {
-
-
 };
 
 namespace onnxruntime {
 namespace test {
 
-
 // Test if folder path given to ep_context_file_path throws an error
 TEST_F(OVEPEPContextTests, OVEPEPContextFolderPath) {
-
   Ort::SessionOptions sessionOptions;
   std::unordered_map<std::string, std::string> ov_options;
 
-  //The below line could fail the test in non NPU platforms.Commenting it out so that the device used for building OVEP will be used.
-  //ov_options["device_type"] = "NPU";
-
+  // The below line could fail the test in non NPU platforms.Commenting it out so that the device used for building OVEP will be used.
+  // ov_options["device_type"] = "NPU";
 
   const std::unordered_map<std::string, int> domain_to_version = {{"", 13}, {kMSDomain, 1}};
 
@@ -66,22 +60,18 @@ TEST_F(OVEPEPContextTests, OVEPEPContextFolderPath) {
 
   const std::string ep_context_file_path = "./ep_context_folder_path/";
 
-
   sessionOptions.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1");
-  sessionOptions.AddConfigEntry(kOrtSessionOptionEpContextFilePath,ep_context_file_path.c_str());
+  sessionOptions.AddConfigEntry(kOrtSessionOptionEpContextFilePath, ep_context_file_path.c_str());
   sessionOptions.AppendExecutionProvider_OpenVINO_V2(ov_options);
 
-
   try {
     Ort::Session session(*ort_env, model_data_span.data(), model_data_span.size(), sessionOptions);
     FAIL();  // Should not get here!
   } catch (const Ort::Exception& excpt) {
     ASSERT_EQ(excpt.GetOrtErrorCode(), ORT_INVALID_ARGUMENT);
     ASSERT_THAT(excpt.what(), testing::HasSubstr("context_file_path should not point to a folder."));
   }
-
 }
 
-
 }  // namespace test
 }  // namespace onnxruntime