Skip to content

Commit 032d6db

Browse files
committed
Fix lint issues
1 parent e0ab94b commit 032d6db

File tree

9 files changed

+62
-68
lines changed

9 files changed

+62
-68
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ BackendManager::BackendManager(SessionContext& session_context,
113113
subgraph_context_.has_dynamic_input_shape = true;
114114
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
115115
if (cpu_or_gpu || (npu && session_context_.enable_causallm) &&
116-
!session_context_.disable_dynamic_shapes) {
116+
!session_context_.disable_dynamic_shapes) {
117117
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
118118
<< "Creating backend Dynamic Shapes";
119119
try {
@@ -448,7 +448,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
448448
const auto& onnx_model_path_name = subgraph.ModelPath();
449449
// QDQ stripping enabled only for the NPU and experimentally on the GPU
450450
if ((session_context_.device_type.find("NPU") != std::string::npos ||
451-
session_context_.device_type.find("GPU") != std::string::npos) &&
451+
session_context_.device_type.find("GPU") != std::string::npos) &&
452452
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
453453
std::unique_ptr<onnxruntime::Model> model;
454454
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_.shared_weights);

onnxruntime/core/providers/openvino/backends/basic_backend.cc

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,6 @@ void BasicBackend::ValidateOrtDimsAgainstPartialShape(const std::vector<int64_t>
390390
}
391391
}
392392

393-
394393
void BasicBackend::RewindKVCache(size_t index) {
395394
OVInferRequestPtr infer_request;
396395
infer_request = inferRequestsQueue_->getIdleRequest();
@@ -402,7 +401,6 @@ void BasicBackend::RewindKVCache(size_t index) {
402401
// an Infer Request indexed by infer_req_idx
403402
void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
404403
try {
405-
406404
const bool is_cpu = session_context_.device_type.find("CPU") != std::string::npos;
407405
const bool is_gpu = session_context_.device_type.find("GPU") != std::string::npos;
408406
const bool is_npu = session_context_.device_type.find("NPU") != std::string::npos;
@@ -411,15 +409,14 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
411409
ov_shapes = classify_shape_flags(exe_network_.Get());
412410
// Loop over subgraph original input names to find the correspondent OV input name
413411
for (const auto& input_info : bindings_->network_inputs_) {
414-
415412
size_t batch_slice_idx = 0;
416413
auto tensor = context.GetInput(input_info.onnx_index);
417414
auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
418415
auto tensor_shape = tensor_info.GetShape();
419416
auto tensor_data = tensor.GetTensorData<char>();
420417
if (ov_shapes.has_bounded_dynamic) {
421-
ov::PartialShape partial_shape = input_info.ov_shape;
422-
ValidateOrtDimsAgainstPartialShape(tensor_shape, partial_shape);
418+
ov::PartialShape partial_shape = input_info.ov_shape;
419+
ValidateOrtDimsAgainstPartialShape(tensor_shape, partial_shape);
423420
}
424421
ov::Shape input_tensor_shape(tensor_shape.begin(), tensor_shape.end());
425422
OVTensorPtr tensor_ptr;
@@ -434,7 +431,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
434431
} else {
435432
if (is_cpu) {
436433
tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_tensor_shape, (void*)tensor_data);
437-
} else { // GPU
434+
} else { // GPU
438435
tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_tensor_shape);
439436
FillInputBlob(tensor_ptr, batch_slice_idx, input_info.name, context, subgraph_context_);
440437
}
@@ -445,14 +442,14 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
445442
ORT_THROW(msg);
446443
}
447444
}
448-
} else { // Other device path
445+
} else { // Other device path
449446
ort_tensor_key_t ort_tensor_key{input_info.name};
450447
auto it = ort_ov_tensor_map.find(ort_tensor_key);
451448

452449
if (it == ort_ov_tensor_map.end() || it->second.ort_ptr != tensor.GetTensorRawData()) {
453450
ov_tensor_data_t ov_tensor_data;
454451
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(input_info.type, input_tensor_shape,
455-
const_cast<void*>(tensor.GetTensorRawData()));
452+
const_cast<void*>(tensor.GetTensorRawData()));
456453
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
457454
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;
458455

@@ -502,7 +499,7 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
502499
// Wait for Async inference completion
503500
try {
504501
infer_request->WaitRequest();
505-
} catch(const std::runtime_error& e) {
502+
} catch (const std::runtime_error& e) {
506503
infer_request->CancelRequest();
507504
inferRequestsQueue_->deleteRequest();
508505
ORT_THROW(log_tag + e.what());
@@ -513,18 +510,18 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
513510
bool npu = session_context_.device_type.find("NPU") != std::string::npos;
514511
if (cpu_or_gpu || (npu && (session_context_.enable_causallm || ov_shapes.is_static))) {
515512
for (const auto& output_info : bindings_->network_outputs_) {
516-
OVTensorPtr graph_output_blob;
517-
try {
518-
graph_output_blob = infer_request->GetTensor(output_info.name);
519-
} catch (const char* msg) {
520-
ORT_THROW(msg);
521-
}
522-
size_t batch_size = 1;
523-
Ort::UnownedValue output_tensor =
524-
GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
525-
auto mem_info = output_tensor.GetTensorMemoryInfo();
526-
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
527-
return;
513+
OVTensorPtr graph_output_blob;
514+
try {
515+
graph_output_blob = infer_request->GetTensor(output_info.name);
516+
} catch (const char* msg) {
517+
ORT_THROW(msg);
518+
}
519+
size_t batch_size = 1;
520+
Ort::UnownedValue output_tensor =
521+
GetOutputTensor(context, batch_size, infer_request, output_info.name, subgraph_context_.output_names);
522+
auto mem_info = output_tensor.GetTensorMemoryInfo();
523+
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
524+
return;
528525
} else {
529526
size_t batch_slice = 0;
530527
FillOutputBlob(std::move(graph_output_blob), output_tensor, batch_slice);
@@ -586,7 +583,7 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
586583
} else {
587584
OVInferRequestPtr infer_request;
588585
infer_request = inferRequestsQueue_->getIdleRequest();
589-
if(infer_request == nullptr) {
586+
if (infer_request == nullptr) {
590587
ORT_THROW("OpenVINO Execution Provider :: There are no inference requests");
591588
LOGS_DEFAULT(FATAL) << log_tag << "Create Infer Requests do not exist";
592589
return;

onnxruntime/core/providers/openvino/backends/basic_backend.h

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ struct ov_tensor_data_t {
3131
};
3232

3333
struct DynamicFlags {
34-
bool is_static = true; // default true if no dynamic dims
35-
bool has_fully_dynamic = false;
36-
bool has_bounded_dynamic = false;
34+
bool is_static = true; // default true if no dynamic dims
35+
bool has_fully_dynamic = false;
36+
bool has_bounded_dynamic = false;
3737

38-
bool is_mixed() const {
39-
return has_fully_dynamic && has_bounded_dynamic;
40-
}
38+
bool is_mixed() const {
39+
return has_fully_dynamic && has_bounded_dynamic;
40+
}
4141
};
4242

4343
struct OnnxToOvNetworkBindings {
@@ -62,8 +62,8 @@ struct OnnxToOvNetworkBindings {
6262
// However, these tensors are internally converted to a stateful representation, which removes them.
6363
// To prevent runtime exceptions, we simply continue processing here.
6464
if ((onnx_name.empty() || onnx_name == "beam_idx" ||
65-
onnx_name.find("past_key_values") != std::string::npos ||
66-
onnx_name.find("present") != std::string::npos) &&
65+
onnx_name.find("past_key_values") != std::string::npos ||
66+
onnx_name.find("present") != std::string::npos) &&
6767
session_context.enable_causallm) {
6868
continue;
6969
}
@@ -133,12 +133,11 @@ class BasicBackend : public IBackend {
133133
DynamicFlags ov_shapes;
134134
};
135135

136-
137136
class InferRequestsQueue {
138137
public:
139138
InferRequestsQueue(OVExeNetwork& net, size_t nireq, std::function<void(OVInferRequestPtr)> initializer) {
140139
OVInferRequestPtr infer_request;
141-
live_threads=nireq;
140+
live_threads = nireq;
142141
for (size_t id = 0; id < nireq; id++) {
143142
infer_request = net.CreateInferRequest();
144143
initializer(infer_request);
@@ -170,7 +169,7 @@ class InferRequestsQueue {
170169

171170
OVInferRequestPtr getIdleRequest() {
172171
std::unique_lock<std::mutex> lock(_mutex);
173-
if(live_threads==0) {
172+
if (live_threads == 0) {
174173
return nullptr;
175174
}
176175

@@ -182,7 +181,7 @@ class InferRequestsQueue {
182181

183182
void deleteRequest() {
184183
std::unique_lock<std::mutex> lock(_mutex);
185-
live_threads=live_threads-1;
184+
live_threads = live_threads - 1;
186185
}
187186

188187
private:

onnxruntime/core/providers/openvino/openvino_execution_provider.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,9 @@ common::Status OpenVINOExecutionProvider::SetEpDynamicOptions(gsl::span<const ch
240240
if (workload_type != "") {
241241
LOGS_DEFAULT(VERBOSE) << "SetEpDynamicOptions - modifying: " << key << "/" << value;
242242
for (auto& backend : backend_managers_) {
243-
ov::CompiledModel ov_compiled_model = backend.GetOVCompiledModel();
244-
if(ov_compiled_model) {
245-
ov_compiled_model.set_property(ov::workload_type(workload_type));
243+
ov::CompiledModel ov_compiled_model = backend.GetOVCompiledModel();
244+
if (ov_compiled_model) {
245+
ov_compiled_model.set_property(ov::workload_type(workload_type));
246246
} else {
247247
LOGS_DEFAULT(VERBOSE) << "Model is not compiled in OV as its dynamic";
248248
ov::AnyMap map;

onnxruntime/core/providers/openvino/ov_allocator.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ void OVRTAllocator::Free(void* p) {
3434
try {
3535
ov::Tensor* tensor = nullptr;
3636
{
37-
std::lock_guard<std::mutex> lock(mutex_);
38-
auto it = allocated_.find(p);
39-
if (it != allocated_.end()) {
40-
tensor = it->second;
41-
allocated_.erase(it);
42-
}
37+
std::lock_guard<std::mutex> lock(mutex_);
38+
auto it = allocated_.find(p);
39+
if (it != allocated_.end()) {
40+
tensor = it->second;
41+
allocated_.erase(it);
42+
}
4343
}
4444
if (tensor) {
4545
delete tensor;

onnxruntime/core/providers/openvino/ov_interface.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ StatefulOVInferRequest::StatefulOVInferRequest(ov::InferRequest infer_request, s
364364
}
365365

366366
void StatefulOVInferRequest::FillTensor(const std::string& tensor_name, const ov::element::Type& type,
367-
const std::vector<size_t>& shape, int32_t fill_value) {
367+
const std::vector<size_t>& shape, int32_t fill_value) {
368368
ov::Tensor tensor = ov::Tensor(type, shape);
369369
std::fill_n(tensor.data<int32_t>(), tensor.get_size(), fill_value);
370370
ovInfReq.set_tensor(tensor_name, tensor);
@@ -379,7 +379,7 @@ void StatefulOVInferRequest::CacheTensor(const std::string& tensor_name, std::ve
379379
}
380380

381381
void StatefulOVInferRequest::SetTensorFromCache(const std::string& tensor_name,
382-
const std::vector<int64_t>& cache_data) {
382+
const std::vector<int64_t>& cache_data) {
383383
auto tensor = ovInfReq.get_tensor(tensor_name);
384384
auto new_shape = tensor.get_shape();
385385
new_shape[1] = cache_data.size();

onnxruntime/core/providers/openvino/ov_interface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ class StatefulOVInferRequest : public OVInferRequest {
132132
void Infer() override;
133133
void RewindKVCache(size_t index) override;
134134
void FillTensor(const std::string& tensor_name, const ov::element::Type& type,
135-
const std::vector<size_t>& shape, int32_t fill_value);
135+
const std::vector<size_t>& shape, int32_t fill_value);
136136
void CacheTensor(const std::string& tensor_name, std::vector<int64_t>& cache);
137137
void SetTensorFromCache(const std::string& tensor_name, const std::vector<int64_t>& cache_data);
138138
std::optional<ov::Tensor> FindTensor(const std::string& tensor_name);

onnxruntime/python/tools/quantization/matmul_nbits_quantizer.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -874,13 +874,18 @@ def quantize_matmul(self, node: NodeProto, graph_stack: list[GraphProto]) -> lis
874874
scales_tensor = onnx.numpy_helper.from_array(scales, b_tensor.name + "_DQ_scales")
875875

876876
# if QDQ, CW and SYM enabled, optimize for Intel NPU, tranpose the weight to NHWC format will increase performance
877-
qdq_opt_for_intel_npu_enabled = self.config.quant_format == QuantFormat.QDQ \
878-
and self.config.channel_wised_quantize and self.config.is_symmetric
877+
qdq_opt_for_intel_npu_enabled = (
878+
self.config.quant_format == QuantFormat.QDQ
879+
and self.config.channel_wised_quantize
880+
and self.config.is_symmetric
881+
)
879882
if qdq_opt_for_intel_npu_enabled:
880883
rows, cols = b_ndarray.shape
881884
packed = transpose_packed_int4_matrix(packed, rows, cols)
882-
scales = scales.reshape((cols, 1)) # (cols, 1)
883-
b_quant = onnx.helper.make_tensor(b_tensor.name + f"_DQ_Q{bits}", qtype, [cols, rows], packed.tobytes(), True)
885+
scales = scales.reshape((cols, 1)) # (cols, 1)
886+
b_quant = onnx.helper.make_tensor(
887+
b_tensor.name + f"_DQ_Q{bits}", qtype, [cols, rows], packed.tobytes(), True
888+
)
884889
scales_tensor = onnx.numpy_helper.from_array(scales, b_tensor.name + "_DQ_scales")
885890

886891
for input in b_graph.input:
@@ -924,7 +929,10 @@ def quantize_matmul(self, node: NodeProto, graph_stack: list[GraphProto]) -> lis
924929
dq_output_names = [b_quant.name + "_output"]
925930
tp_input_names = [dq_output_names[0]]
926931
tp_output_names = [dq_output_names[0] + "_transposed"]
927-
matmul_input_names = [node.input[0], tp_output_names[0] if qdq_opt_for_intel_npu_enabled else dq_output_names[0]]
932+
matmul_input_names = [
933+
node.input[0],
934+
tp_output_names[0] if qdq_opt_for_intel_npu_enabled else dq_output_names[0],
935+
]
928936
matmul_output_names = [node.output[0]]
929937
if not self.config.is_symmetric:
930938
zp_tensor = onnx.helper.make_tensor(
@@ -935,7 +943,7 @@ def quantize_matmul(self, node: NodeProto, graph_stack: list[GraphProto]) -> lis
935943
rows, cols = b_ndarray.shape
936944
dq_kwargs = {
937945
"axis": 1 if qdq_opt_for_intel_npu_enabled else 0,
938-
"block_size": rows if self.config.channel_wised_quantize else self.config.block_size
946+
"block_size": rows if self.config.channel_wised_quantize else self.config.block_size,
939947
}
940948
dq_node = onnx.helper.make_node(
941949
"DequantizeLinear",
@@ -955,7 +963,7 @@ def quantize_matmul(self, node: NodeProto, graph_stack: list[GraphProto]) -> lis
955963
"Transpose",
956964
inputs=tp_input_names,
957965
outputs=tp_output_names,
958-
perm=[1,0],
966+
perm=[1, 0],
959967
)
960968
output_nodes.extend([dq_node, tp_node, matmul_node])
961969
else:

onnxruntime/test/providers/openvino/openvino_ep_context_test.cc

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,27 +25,21 @@
2525
using namespace ONNX_NAMESPACE;
2626
using namespace onnxruntime::logging;
2727

28-
2928
extern std::unique_ptr<Ort::Env> ort_env;
3029

3130
class OVEPEPContextTests : public ::testing::Test {
32-
33-
3431
};
3532

3633
namespace onnxruntime {
3734
namespace test {
3835

39-
4036
// Test if folder path given to ep_context_file_path throws an error
4137
TEST_F(OVEPEPContextTests, OVEPEPContextFolderPath) {
42-
4338
Ort::SessionOptions sessionOptions;
4439
std::unordered_map<std::string, std::string> ov_options;
4540

46-
//The below line could fail the test in non NPU platforms.Commenting it out so that the device used for building OVEP will be used.
47-
//ov_options["device_type"] = "NPU";
48-
41+
// The below line could fail the test in non NPU platforms.Commenting it out so that the device used for building OVEP will be used.
42+
// ov_options["device_type"] = "NPU";
4943

5044
const std::unordered_map<std::string, int> domain_to_version = {{"", 13}, {kMSDomain, 1}};
5145

@@ -66,22 +60,18 @@ TEST_F(OVEPEPContextTests, OVEPEPContextFolderPath) {
6660

6761
const std::string ep_context_file_path = "./ep_context_folder_path/";
6862

69-
7063
sessionOptions.AddConfigEntry(kOrtSessionOptionEpContextEnable, "1");
71-
sessionOptions.AddConfigEntry(kOrtSessionOptionEpContextFilePath,ep_context_file_path.c_str());
64+
sessionOptions.AddConfigEntry(kOrtSessionOptionEpContextFilePath, ep_context_file_path.c_str());
7265
sessionOptions.AppendExecutionProvider_OpenVINO_V2(ov_options);
7366

74-
7567
try {
7668
Ort::Session session(*ort_env, model_data_span.data(), model_data_span.size(), sessionOptions);
7769
FAIL(); // Should not get here!
7870
} catch (const Ort::Exception& excpt) {
7971
ASSERT_EQ(excpt.GetOrtErrorCode(), ORT_INVALID_ARGUMENT);
8072
ASSERT_THAT(excpt.what(), testing::HasSubstr("context_file_path should not point to a folder."));
8173
}
82-
8374
}
8475

85-
8676
} // namespace test
8777
} // namespace onnxruntime

0 commit comments

Comments
 (0)