Skip to content

Commit 2e4d541

Browse files
Enable adaptive stripping and eliminate dependency of weight sharing feature on OVEP qdq stripping (#629)
* eliminate dependency of weight sharing on ovep qdq stripping pass * fix qdqnodeunit issue * enable compiler stripping * enable adaptive stripping: cleanup code * fix backward compatibility issue * add logs to identify which stripping is enabled * address PR review comments * fix unused variable error * resolve unused var issue * fix CI issues
1 parent 80dfee9 commit 2e4d541

File tree

5 files changed

+57
-13
lines changed

5 files changed

+57
-13
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "core/providers/openvino/ibackend.h"
2020
#include "core/providers/openvino/backend_utils.h"
2121
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
22+
#include "core/providers/openvino/ov_interface.h"
2223

2324
namespace onnxruntime {
2425
namespace openvino_ep {
@@ -359,22 +360,37 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
359360
}
360361
};
361362

363+
[[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph);
364+
[[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type);
365+
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025))
366+
if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) {
367+
if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) {
368+
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: Compiler QDQ optimization pass is enabled";
369+
OVCore::Get()->core.set_property("NPU", {ov::intel_npu::qdq_optimization(true)});
370+
// disabling OVEP qdq stripping
371+
// at this stage provider option "enable_qdq_optimizer" is still true but OVEP stripping is (disabled) false
372+
// as compiler stripping is enabled
373+
enable_ovep_qdq_optimizer = false;
374+
} else {
375+
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: OVEP QDQ optimization pass is enabled";
376+
}
377+
}
378+
#endif
379+
362380
const auto& onnx_model_path_name = subgraph.ModelPath();
363381
// QDQ stripping enabled only for the NPU
364382
if (session_context_.device_type.find("NPU") != std::string::npos &&
365-
session_context_.enable_qdq_optimizer &&
366-
IsQDQGraph(subgraph)) {
367-
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1";
383+
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
368384
std::unique_ptr<onnxruntime::Model> model;
369-
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights);
385+
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights, enable_ovep_qdq_optimizer);
370386
auto model_proto = model->ToProto();
371387
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
372388
print_model_proto_duration();
373389
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
374390
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
375391
return model_proto;
376392
} else {
377-
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 0";
393+
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
378394
auto model = subgraph.CreateModel(logger);
379395
auto model_proto = model->ToProto();
380396
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);

onnxruntime/core/providers/openvino/ov_interface.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ void printDebugInfo(const ov::CompiledModel& obj) {
4646
}
4747
#endif
4848

49+
// Function to check if a given OV property is enabled
50+
std::optional<bool> queryOVProperty(const std::string& property, const std::string& device_type) {
51+
try {
52+
// Get the property value
53+
auto supported_properties = OVCore::Get()->core.get_property(device_type, ov::supported_properties);
54+
return std::find(supported_properties.begin(), supported_properties.end(), property) != supported_properties.end();
55+
} catch (const std::exception&) {
56+
return std::nullopt; // Property not found or invalid
57+
}
58+
}
59+
4960
std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string&& model, const std::string& model_path) {
5061
try {
5162
std::istringstream modelStringStream(std::move(model));

onnxruntime/core/providers/openvino/ov_interface.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <fstream>
99
#include <sstream>
1010
#include <utility>
11+
#include <optional>
1112

1213
#include "openvino/openvino.hpp"
1314
#include "openvino/runtime/intel_npu/properties.hpp"
@@ -37,6 +38,8 @@ typedef ov::intel_gpu::ocl::ClContext* OVRemoteContextPtr;
3738
typedef ov::RemoteContext OVRemoteContext;
3839
#endif
3940

41+
std::optional<bool> queryOVProperty(const std::string& property, const std::string& device_type);
42+
4043
template <typename T>
4144
class WeakSingleton {
4245
public:

onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ static bool CheckDQRuleSet(const NodeUnit& node_unit,
341341
}
342342
}
343343

344+
// this check is if QLinear node feed into the output of src graph which expects quantized output
344345
static bool CheckQFeedsIntoQuantizedOutput(const NodeUnit& node_unit,
345346
const std::unordered_map<std::string, std::string> graph_op_data_type) {
346347
auto op_of_quantized_layer = node_unit.Outputs();
@@ -447,9 +448,17 @@ static bool HandleDoubleQDQ(onnxruntime::Graph& dst_graph, const onnxruntime::Gr
447448
static void AddStandaloneNodeUnit(onnxruntime::Graph& dst_graph, const onnxruntime::GraphViewer& src_graph,
448449
const NodeUnit& node_unit,
449450
std::set<std::string>& initializers_to_keep,
450-
const logging::Logger& /* logger */) {
451+
const logging::Logger& /* logger */,
452+
bool IsWeightSharingWithoutOVEPQDQStripping) {
451453
assert(node_unit.UnitType() == NodeUnit::Type::SingleNode);
452454

455+
// this is the scenario where WAI is enabled and ovep stripping is disabled
456+
// do not strip off any Q or DQ node
457+
if (IsWeightSharingWithoutOVEPQDQStripping) {
458+
AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
459+
return;
460+
}
461+
453462
if (HandleDoubleQDQ(dst_graph, src_graph, node_unit, initializers_to_keep)) return;
454463

455464
auto add_identity_op = [&](bool duplicate_dq) {
@@ -511,7 +520,8 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph,
511520
const onnxruntime::GraphViewer& src_graph,
512521
const NodeUnit& node_unit,
513522
std::set<std::string>& initializers_to_keep,
514-
const logging::Logger& /* logger */) {
523+
const logging::Logger& /* logger */,
524+
bool IsWeightSharingWithoutOVEPQDQStripping) {
515525
assert(node_unit.UnitType() == NodeUnit::Type::QDQGroup);
516526

517527
// Collect inputs coming into the node unit.
@@ -529,7 +539,7 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph,
529539
SkipReason reason = SkipReason::Other;
530540
bool keep_dq = CheckDQRuleSet(node_unit, dq_node, src_graph, reason);
531541

532-
if (keep_dq) {
542+
if (IsWeightSharingWithoutOVEPQDQStripping || keep_dq) {
533543
AddNode(initializers_to_keep, src_graph, dst_graph, *dq_node);
534544
dq_node_args_to_keep.insert({input_defs.at(0)->Name(),
535545
&dst_graph.GetOrCreateNodeArg(dq_node->OutputDefs().at(0)->Name(),
@@ -597,7 +607,7 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph,
597607

598608
bool keep_q = CheckQRuleSet(node_unit, q_node, src_graph, reason);
599609

600-
if (keep_q) {
610+
if (IsWeightSharingWithoutOVEPQDQStripping || keep_q) {
601611
AddNode(initializers_to_keep, src_graph, dst_graph, *q_node);
602612
// if keep_q, then output defs of the target node doesn't change
603613
output_args.push_back(&dst_graph.GetOrCreateNodeArg(target_node.OutputDefs().at(i)->Name(),
@@ -675,7 +685,8 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
675685
const logging::Logger& logger,
676686
bool enable_ovep_weight_sharing,
677687
/*out*/ std::unique_ptr<onnxruntime::Model>& model,
678-
/*out*/ sw& shared_weights) {
688+
/*out*/ sw& shared_weights,
689+
bool enable_ovep_qdq_optimizer) {
679690
// NOTE: This function is a re-implementation of GraphViewerToProto() in core/graph/graph_proto_serializer.cc
680691
// with the following differences:
681692
// - Uses onnxruntime::Graph APIs instead of onnx::GraphProto APIs.
@@ -766,10 +777,12 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
766777
continue; // Already handled this node unit
767778
}
768779

780+
bool IsWeightSharingWithoutOVEPQDQStripping = enable_ovep_weight_sharing && !enable_ovep_qdq_optimizer;
781+
769782
if (node_unit->UnitType() == NodeUnit::Type::SingleNode) {
770-
AddStandaloneNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger);
783+
AddStandaloneNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger, IsWeightSharingWithoutOVEPQDQStripping);
771784
} else {
772-
AddQDQNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger);
785+
AddQDQNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger, IsWeightSharingWithoutOVEPQDQStripping);
773786
}
774787

775788
seen_node_units.insert(node_unit);

onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
1717
const logging::Logger& logger,
1818
bool enable_ovep_weight_sharing,
1919
/*out*/ std::unique_ptr<onnxruntime::Model>& model,
20-
/*out*/ sw& shared_weights);
20+
/*out*/ sw& shared_weights,
21+
bool enable_ovep_qdq_optimizer);
2122

2223
bool dumpMetaDataMapToBinary(const sw::Metadata::Map& shared_weights, const std::string& filename);
2324
} // namespace openvino_ep

0 commit comments

Comments
 (0)