From c1dce2b10dcbd13bd556a059094cba0d9b877faa Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 27 Mar 2025 15:23:56 -0500 Subject: [PATCH 1/5] RetryAction compiles --- .../SonicCore/interface/RetryActionBase.h | 26 +++++++++++++++++++ .../interface/RetrySameServerAction.h | 14 ++++++++++ .../SonicCore/src/RetryActionBase.cc | 15 +++++++++++ .../SonicCore/src/RetrySameServerAction.cc | 11 ++++++++ 4 files changed, 66 insertions(+) create mode 100644 HeterogeneousCore/SonicCore/interface/RetryActionBase.h create mode 100644 HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h create mode 100644 HeterogeneousCore/SonicCore/src/RetryActionBase.cc create mode 100644 HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc diff --git a/HeterogeneousCore/SonicCore/interface/RetryActionBase.h b/HeterogeneousCore/SonicCore/interface/RetryActionBase.h new file mode 100644 index 0000000000000..3a95578783b3d --- /dev/null +++ b/HeterogeneousCore/SonicCore/interface/RetryActionBase.h @@ -0,0 +1,26 @@ +#ifndef RETRY_ACTION_BASE_H +#define RETRY_ACTION_BASE_H + +#include "FWCore/PluginManager/interface/PluginFactory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/SonicCore/interface/SonicClientBase.h" +#include +#include + +// Base class for retry actions +class RetryActionBase { +public: + RetryActionBase(const edm::ParameterSet& conf, SonicClientBase* client); + virtual ~RetryActionBase() = default; +protected: + virtual void retry() = 0; // Pure virtual function for execution logic + void eval(); // interface for calling evaluate in client + +protected: + SonicClientBase* client_; +}; + +// Define the factory for creating retry actions +using RetryActionFactory = edmplugin::PluginFactory; + +#endif // RETRY_ACTION_BASE_H diff --git a/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h b/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h new file mode 100644 index 0000000000000..cb752262dce28 --- /dev/null +++ b/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h @@ -0,0 +1,14 @@ +#include "HeterogeneousCore/SonicCore/interface/RetryActionBase.h" +#include "HeterogeneousCore/SonicCore/interface/SonicClientBase.h" + +class RetrySameServerAction : public RetryActionBase { +public: + RetrySameServerAction(const edm::ParameterSet& pset, SonicClientBase* client) + : RetryActionBase(pset, client), + allowedTries_(pset.getUntrackedParameter("allowedTries", 0)) {} +protected: + void retry(); + +private: + unsigned allowedTries_,tries_; +}; diff --git a/HeterogeneousCore/SonicCore/src/RetryActionBase.cc b/HeterogeneousCore/SonicCore/src/RetryActionBase.cc new file mode 100644 index 0000000000000..ecdae15543654 --- /dev/null +++ b/HeterogeneousCore/SonicCore/src/RetryActionBase.cc @@ -0,0 +1,15 @@ +#include "HeterogeneousCore/SonicCore/interface/RetryActionBase.h" + +// Constructor implementation +RetryActionBase::RetryActionBase(const edm::ParameterSet& conf,SonicClientBase* client) : client_(client) {} + + +void RetryActionBase::eval() { + if (client_) { + client_->evaluate(); + } else { + edm::LogError("RetryActionBase") << "Client pointer is null, cannot evaluate."; + } +} + +EDM_REGISTER_PLUGINFACTORY(RetryActionFactory, "RetryActionFactory"); diff --git a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc new file mode 100644 index 0000000000000..637c4fe2bbff9 --- /dev/null +++ b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc @@ -0,0 +1,11 @@ +#include "HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h" +#include "HeterogeneousCore/SonicCore/interface/SonicClientBase.h" + +void RetrySameServerAction::retry() { + ++tries_; + //if max retries has not been exceeded, call evaluate again + if (tries_ < allowedTries_) { + eval(); + return; + } +} From 4dde1c61684f0adc50a2aa1cf5ec1ce922820b61 Mon Sep 17 00:00:00 2001 From: Martin Date: Wed, 2 Apr 2025 09:52:35 -0500 Subject: [PATCH 2/5] Include RetryAction in SonicClientBase --- .../SonicCore/interface/RetryActionBase.h | 23 +++++--- .../interface/RetrySameServerAction.h | 12 ++-- .../SonicCore/interface/SonicClientBase.h | 12 ++++ .../SonicCore/src/RetryActionBase.cc | 13 ++--- .../SonicCore/src/RetrySameServerAction.cc | 19 ++++--- .../SonicCore/src/SonicClientBase.cc | 55 ++++++++++++++++--- 6 files changed, 99 insertions(+), 35 deletions(-) diff --git a/HeterogeneousCore/SonicCore/interface/RetryActionBase.h b/HeterogeneousCore/SonicCore/interface/RetryActionBase.h index 3a95578783b3d..4732abc27a38f 100644 --- a/HeterogeneousCore/SonicCore/interface/RetryActionBase.h +++ b/HeterogeneousCore/SonicCore/interface/RetryActionBase.h @@ -10,17 +10,24 @@ // Base class for retry actions class RetryActionBase { public: - RetryActionBase(const edm::ParameterSet& conf, SonicClientBase* client); - virtual ~RetryActionBase() = default; + RetryActionBase(const edm::ParameterSet& conf, SonicClientBase* client); + virtual ~RetryActionBase() = default; + + bool shouldRetry() const { return shouldRetry_; } // Getter for shouldRetry_ + + virtual void retry() = 0; // Pure virtual function for execution logic + virtual void start() = 0; // Pure virtual function for execution logic for initialization + protected: - virtual void retry() = 0; // Pure virtual function for execution logic - void eval(); // interface for calling evaluate in client - + void eval(); // interface for calling evaluate in client + protected: - SonicClientBase* client_; + SonicClientBase* client_; + bool shouldRetry_; // Flag to track if further retries should happen }; // Define the factory for creating retry actions -using RetryActionFactory = edmplugin::PluginFactory; +using RetryActionFactory = + edmplugin::PluginFactory; -#endif // RETRY_ACTION_BASE_H +#endif // RETRY_ACTION_BASE_H diff --git a/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h b/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h index cb752262dce28..cd8cda3a2d435 100644 --- a/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h +++ b/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h @@ -3,12 +3,14 @@ class RetrySameServerAction : public RetryActionBase { public: - RetrySameServerAction(const edm::ParameterSet& pset, SonicClientBase* client) - : RetryActionBase(pset, client), - allowedTries_(pset.getUntrackedParameter("allowedTries", 0)) {} + RetrySameServerAction(const edm::ParameterSet& pset, SonicClientBase* client) + : RetryActionBase(pset, client), allowedTries_(pset.getUntrackedParameter("allowedTries", 0)) {} + + void start() override { tries_=0;}; + protected: - void retry(); + void retry() override; private: - unsigned allowedTries_,tries_; + unsigned allowedTries_, tries_; }; diff --git a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h index 47caaae8b2052..5038f566dbc27 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h +++ b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h @@ -9,12 +9,15 @@ #include "HeterogeneousCore/SonicCore/interface/SonicDispatcherPseudoAsync.h" #include +#include #include #include #include enum class SonicMode { Sync = 1, Async = 2, PseudoAsync = 3 }; +class RetryActionBase; + class SonicClientBase { public: //constructor @@ -57,11 +60,20 @@ class SonicClientBase { unsigned allowedTries_, tries_; std::optional holder_; + // Use a unique_ptr with a custom deleter to avoid incomplete type issues + struct RetryDeleter { + void operator()(RetryActionBase* ptr) const; + }; + + using RetryActionPtr = std::unique_ptr; + std::vector retryActions_; + //for logging/debugging std::string debugName_, clientName_, fullDebugName_; friend class SonicDispatcher; friend class SonicDispatcherPseudoAsync; + friend class RetryActionBase; }; #endif diff --git a/HeterogeneousCore/SonicCore/src/RetryActionBase.cc b/HeterogeneousCore/SonicCore/src/RetryActionBase.cc index ecdae15543654..c595458570b0d 100644 --- a/HeterogeneousCore/SonicCore/src/RetryActionBase.cc +++ b/HeterogeneousCore/SonicCore/src/RetryActionBase.cc @@ -1,15 +1,14 @@ #include "HeterogeneousCore/SonicCore/interface/RetryActionBase.h" // Constructor implementation -RetryActionBase::RetryActionBase(const edm::ParameterSet& conf,SonicClientBase* client) : client_(client) {} - +RetryActionBase::RetryActionBase(const edm::ParameterSet& conf, SonicClientBase* client) : client_(client), shouldRetry_(true) {} void RetryActionBase::eval() { - if (client_) { - client_->evaluate(); - } else { - edm::LogError("RetryActionBase") << "Client pointer is null, cannot evaluate."; - } + if (client_) { + client_->evaluate(); + } else { + edm::LogError("RetryActionBase") << "Client pointer is null, cannot evaluate."; + } } EDM_REGISTER_PLUGINFACTORY(RetryActionFactory, "RetryActionFactory"); diff --git a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc index 637c4fe2bbff9..16959bec547a1 100644 --- a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc +++ b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc @@ -1,11 +1,16 @@ #include "HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h" #include "HeterogeneousCore/SonicCore/interface/SonicClientBase.h" -void RetrySameServerAction::retry() { - ++tries_; - //if max retries has not been exceeded, call evaluate again - if (tries_ < allowedTries_) { - eval(); - return; - } +void RetrySameServerAction::retry() { + ++tries_; + //if max retries has not been exceeded, call evaluate again + if (tries_ < allowedTries_) { + eval(); + return; + }else{ + shouldRetry_ = false; // Flip flag when max retries are reached + edm::LogInfo("RetrySameServerAction") << "Max retry attempts reached. No further retries."; + } } + +DEFINE_EDM_PLUGIN(RetryActionFactory, RetrySameServerAction, "RetrySameServerAction"); diff --git a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc index 745c51f17aaf3..2a4bb73a128b8 100644 --- a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc +++ b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc @@ -1,7 +1,14 @@ #include "HeterogeneousCore/SonicCore/interface/SonicClientBase.h" +#include "HeterogeneousCore/SonicCore/interface/RetryActionBase.h" #include "FWCore/Utilities/interface/Exception.h" #include "FWCore/ParameterSet/interface/allowedValues.h" + +// Custom deleter implementation +void SonicClientBase::RetryDeleter::operator()(RetryActionBase* ptr) const { + delete ptr; +} + SonicClientBase::SonicClientBase(const edm::ParameterSet& params, const std::string& debugName, const std::string& clientName) @@ -12,6 +19,18 @@ SonicClientBase::SonicClientBase(const edm::ParameterSet& params, if (!clientName_.empty()) fullDebugName_ += ":" + clientName_; + std::vector retryPSetList = params.getParameter>("Retry"); + + for (const auto& retryPSet : retryPSetList) { + std::string actionType = retryPSet.getParameter("retryType"); + + auto retryAction = RetryActionFactory::get()->create(actionType, retryPSet, this); + if (retryAction) { + //Convert to RetryActionPtr Type from raw pointer of retryAction + retryActions_.emplace_back(RetryActionPtr(retryAction.release())); + } + } + std::string modeName(params.getParameter("mode")); if (modeName == "Sync") setMode(SonicMode::Sync); @@ -40,19 +59,39 @@ void SonicClientBase::start(edm::WaitingTaskWithArenaHolder holder) { holder_ = std::move(holder); } -void SonicClientBase::start() { tries_ = 0; } +void SonicClientBase::start() { + tries_ = 0; + // initialize all actions + for (const auto& action : retryActions_) { + action->start(); + } +} void SonicClientBase::finish(bool success, std::exception_ptr eptr) { //retries are only allowed if no exception was raised if (!success and !eptr) { - ++tries_; - //if max retries has not been exceeded, call evaluate again - if (tries_ < allowedTries_) { - evaluate(); - //avoid calling doneWaiting() twice - return; + //++tries_; + ////if max retries has not been exceeded, call evaluate again + //if (tries_ < allowedTries_) { + // evaluate(); + // //avoid calling doneWaiting() twice + // return; + //} + + // Check if any retry actions are still valid + bool anyRetryAllowed = false; + for (const auto& action : retryActions_) { + if (action->shouldRetry()) { + action->retry(); // Call retry only if shouldRetry_ is true + return; + } + } + // If no actions allow retries, stop retrying + if (!anyRetryAllowed) { + edm::LogInfo("SonicClientBase") << "No retry actions available. Stopping retries."; + return; } - //prepare an exception if exceeded + //prepare an exception if no more retries left else { edm::Exception ex(edm::errors::ExternalFailure); ex << "SonicCallFailed: call failed after max " << tries_ << " tries"; From 5b093e4f8d610955dc4876b0efbc729b5d6111ae Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 7 Apr 2025 08:55:41 -0500 Subject: [PATCH 3/5] Update PR comments --- .../SonicCore/interface/RetryActionBase.h | 8 +- .../interface/RetrySameServerAction.h | 2 +- .../SonicCore/interface/SonicClientBase.h | 4 +- .../SonicCore/src/RetryActionBase.cc | 3 +- .../SonicCore/src/RetrySameServerAction.cc | 6 +- .../SonicCore/src/SonicClientBase.cc | 82 +++++++++---------- .../SonicCore/test/DummyClient.h | 2 +- .../SonicCore/test/sonicTest_cfg.py | 44 ++++++++-- 8 files changed, 91 insertions(+), 60 deletions(-) diff --git a/HeterogeneousCore/SonicCore/interface/RetryActionBase.h b/HeterogeneousCore/SonicCore/interface/RetryActionBase.h index 4732abc27a38f..d81183df39a47 100644 --- a/HeterogeneousCore/SonicCore/interface/RetryActionBase.h +++ b/HeterogeneousCore/SonicCore/interface/RetryActionBase.h @@ -1,5 +1,5 @@ -#ifndef RETRY_ACTION_BASE_H -#define RETRY_ACTION_BASE_H +#ifndef HeterogeneousCore_SonicCore_RetryActionBase +#define HeterogeneousCore_SonicCore_RetryActionBase #include "FWCore/PluginManager/interface/PluginFactory.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" @@ -19,7 +19,7 @@ class RetryActionBase { virtual void start() = 0; // Pure virtual function for execution logic for initialization protected: - void eval(); // interface for calling evaluate in client + void eval(); // interface for calling evaluate in client protected: SonicClientBase* client_; @@ -30,4 +30,4 @@ class RetryActionBase { using RetryActionFactory = edmplugin::PluginFactory; -#endif // RETRY_ACTION_BASE_H +#endif diff --git a/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h b/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h index cd8cda3a2d435..8ecce2a170847 100644 --- a/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h +++ b/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h @@ -6,7 +6,7 @@ class RetrySameServerAction : public RetryActionBase { RetrySameServerAction(const edm::ParameterSet& pset, SonicClientBase* client) : RetryActionBase(pset, client), allowedTries_(pset.getUntrackedParameter("allowedTries", 0)) {} - void start() override { tries_=0;}; + void start() override { tries_ = 0; }; protected: void retry() override; diff --git a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h index 5038f566dbc27..45a089701ed12 100644 --- a/HeterogeneousCore/SonicCore/interface/SonicClientBase.h +++ b/HeterogeneousCore/SonicCore/interface/SonicClientBase.h @@ -57,12 +57,12 @@ class SonicClientBase { SonicMode mode_; bool verbose_; std::unique_ptr dispatcher_; - unsigned allowedTries_, tries_; + unsigned totalTries_; std::optional holder_; // Use a unique_ptr with a custom deleter to avoid incomplete type issues struct RetryDeleter { - void operator()(RetryActionBase* ptr) const; + void operator()(RetryActionBase* ptr) const; }; using RetryActionPtr = std::unique_ptr; diff --git a/HeterogeneousCore/SonicCore/src/RetryActionBase.cc b/HeterogeneousCore/SonicCore/src/RetryActionBase.cc index c595458570b0d..41b9a6186da2b 100644 --- a/HeterogeneousCore/SonicCore/src/RetryActionBase.cc +++ b/HeterogeneousCore/SonicCore/src/RetryActionBase.cc @@ -1,7 +1,8 @@ #include "HeterogeneousCore/SonicCore/interface/RetryActionBase.h" // Constructor implementation -RetryActionBase::RetryActionBase(const edm::ParameterSet& conf, SonicClientBase* client) : client_(client), shouldRetry_(true) {} +RetryActionBase::RetryActionBase(const edm::ParameterSet& conf, SonicClientBase* client) + : client_(client), shouldRetry_(true) {} void RetryActionBase::eval() { if (client_) { diff --git a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc index 16959bec547a1..b5a24af935596 100644 --- a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc +++ b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc @@ -7,9 +7,9 @@ void RetrySameServerAction::retry() { if (tries_ < allowedTries_) { eval(); return; - }else{ - shouldRetry_ = false; // Flip flag when max retries are reached - edm::LogInfo("RetrySameServerAction") << "Max retry attempts reached. No further retries."; + } else { + shouldRetry_ = false; // Flip flag when max retries are reached + edm::LogInfo("RetrySameServerAction") << "Max retry attempts reached. No further retries."; } } diff --git a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc index 2a4bb73a128b8..514a680b2518b 100644 --- a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc +++ b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc @@ -3,35 +3,31 @@ #include "FWCore/Utilities/interface/Exception.h" #include "FWCore/ParameterSet/interface/allowedValues.h" - // Custom deleter implementation -void SonicClientBase::RetryDeleter::operator()(RetryActionBase* ptr) const { - delete ptr; -} +void SonicClientBase::RetryDeleter::operator()(RetryActionBase* ptr) const { delete ptr; } SonicClientBase::SonicClientBase(const edm::ParameterSet& params, const std::string& debugName, const std::string& clientName) - : allowedTries_(params.getUntrackedParameter("allowedTries", 0)), - debugName_(debugName), - clientName_(clientName), - fullDebugName_(debugName_) { + : debugName_(debugName), clientName_(clientName), fullDebugName_(debugName_) { if (!clientName_.empty()) fullDebugName_ += ":" + clientName_; - std::vector retryPSetList = params.getParameter>("Retry"); + const auto& retryPSetList = params.getParameter>("Retry"); + std::string modeName(params.getParameter("mode")); for (const auto& retryPSet : retryPSetList) { - std::string actionType = retryPSet.getParameter("retryType"); + const std::string& actionType = retryPSet.getParameter("retryType"); - auto retryAction = RetryActionFactory::get()->create(actionType, retryPSet, this); - if (retryAction) { - //Convert to RetryActionPtr Type from raw pointer of retryAction - retryActions_.emplace_back(RetryActionPtr(retryAction.release())); - } + auto retryAction = RetryActionFactory::get()->create(actionType, retryPSet, this); + if (retryAction) { + //Convert to RetryActionPtr Type from raw pointer of retryAction + retryActions_.emplace_back(RetryActionPtr(retryAction.release())); + } else { + throw cms::Exception("Configuration") << "Unknown Retry type" << actionType << " for SonicClient: " << modeName; + } } - std::string modeName(params.getParameter("mode")); if (modeName == "Sync") setMode(SonicMode::Sync); else if (modeName == "Async") @@ -59,42 +55,32 @@ void SonicClientBase::start(edm::WaitingTaskWithArenaHolder holder) { holder_ = std::move(holder); } -void SonicClientBase::start() { - tries_ = 0; - // initialize all actions - for (const auto& action : retryActions_) { - action->start(); - } +void SonicClientBase::start() { + totalTries_ = 0; + // initialize all actions + for (const auto& action : retryActions_) { + action->start(); + } } void SonicClientBase::finish(bool success, std::exception_ptr eptr) { //retries are only allowed if no exception was raised if (!success and !eptr) { - //++tries_; - ////if max retries has not been exceeded, call evaluate again - //if (tries_ < allowedTries_) { - // evaluate(); - // //avoid calling doneWaiting() twice - // return; - //} - + ++totalTries_; // Check if any retry actions are still valid bool anyRetryAllowed = false; for (const auto& action : retryActions_) { - if (action->shouldRetry()) { - action->retry(); // Call retry only if shouldRetry_ is true - return; - } - } - // If no actions allow retries, stop retrying - if (!anyRetryAllowed) { - edm::LogInfo("SonicClientBase") << "No retry actions available. Stopping retries."; + if (action->shouldRetry()) { + action->retry(); // Call retry only if shouldRetry_ is true return; + } } //prepare an exception if no more retries left - else { + if (!anyRetryAllowed) { + edm::LogInfo("SonicClientBase") << "SonicCallFailed: call failed, no retry actions available after " + << totalTries_ << " tries."; edm::Exception ex(edm::errors::ExternalFailure); - ex << "SonicCallFailed: call failed after max " << tries_ << " tries"; + ex << "SonicCallFailed: call failed, no retry actions available after " << totalTries_ << " tries."; eptr = make_exception_ptr(ex); } } @@ -113,7 +99,19 @@ void SonicClientBase::fillBasePSetDescription(edm::ParameterSetDescription& desc //restrict allowed values desc.ifValue(edm::ParameterDescription("mode", "PseudoAsync", true), edm::allowedValues("Sync", "Async", "PseudoAsync")); - if (allowRetry) - desc.addUntracked("allowedTries", 0); + if (allowRetry) { + // Defines the structure of each entry in the VPSet + edm::ParameterSetDescription retryDesc; + retryDesc.add("retryType", "RetrySameServerAction"); + + // Define a default retry action + edm::ParameterSet defaultRetry; + defaultRetry.addParameter("retryType", "RetrySameServerAction"); + defaultRetry.addUntrackedParameter("allowedTries", 0); + + // Add the VPSet with the default retry action + desc.addVPSet("Retry", retryDesc, {defaultRetry}); + } + desc.add("sonicClientBase", desc); desc.addUntracked("verbose", false); } diff --git a/HeterogeneousCore/SonicCore/test/DummyClient.h b/HeterogeneousCore/SonicCore/test/DummyClient.h index ccef888ad9f7d..6504843926c0a 100644 --- a/HeterogeneousCore/SonicCore/test/DummyClient.h +++ b/HeterogeneousCore/SonicCore/test/DummyClient.h @@ -36,7 +36,7 @@ class DummyClient : public SonicClient { this->output_ = this->input_ * factor_; //simulate a failure - if (this->tries_ < fails_) + if (this->totalTries_ < fails_) this->finish(false); else this->finish(true); diff --git a/HeterogeneousCore/SonicCore/test/sonicTest_cfg.py b/HeterogeneousCore/SonicCore/test/sonicTest_cfg.py index 2cc429138b85c..43a183372dc33 100644 --- a/HeterogeneousCore/SonicCore/test/sonicTest_cfg.py +++ b/HeterogeneousCore/SonicCore/test/sonicTest_cfg.py @@ -26,8 +26,13 @@ mode = cms.string("Sync"), factor = cms.int32(-1), wait = cms.int32(10), - allowedTries = cms.untracked.uint32(0), fails = cms.uint32(0), + Retry = cms.VPSet( + cms.PSet( + retryType = cms.string('RetrySameServerAction'), + allowedTries = cms.untracked.uint32(0) + ) + ) ), ) @@ -37,8 +42,14 @@ mode = cms.string("PseudoAsync"), factor = cms.int32(2), wait = cms.int32(10), - allowedTries = cms.untracked.uint32(0), fails = cms.uint32(0), + Retry = cms.VPSet( + cms.PSet( + retryType = cms.string('RetrySameServerAction'), + allowedTries = cms.untracked.uint32(0) + ) + ) + ), ) @@ -48,32 +59,53 @@ mode = cms.string("Async"), factor = cms.int32(5), wait = cms.int32(10), - allowedTries = cms.untracked.uint32(0), fails = cms.uint32(0), + Retry = cms.VPSet( + cms.PSet( + retryType = cms.string('RetrySameServerAction'), + allowedTries = cms.untracked.uint32(0) + ) + ) ), ) process.dummySyncRetry = process.dummySync.clone( Client = dict( wait = 2, - allowedTries = 2, fails = 1, + Retry = cms.VPSet( + cms.PSet( + retryType = cms.string('RetrySameServerAction'), + allowedTries = cms.untracked.uint32(2) + ) + ) + ) ) process.dummyPseudoAsyncRetry = process.dummyPseudoAsync.clone( Client = dict( wait = 2, - allowedTries = 2, fails = 1, + Retry = cms.VPSet( + cms.PSet( + retryType = cms.string('RetrySameServerAction'), + allowedTries = cms.untracked.uint32(2) + ) + ) ) ) process.dummyAsyncRetry = process.dummyAsync.clone( Client = dict( wait = 2, - allowedTries = 2, fails = 1, + Retry = cms.VPSet( + cms.PSet( + allowedTries = cms.untracked.uint32(2), + retryType = cms.string('RetrySameServerAction') + ) + ) ) ) From c062112a0673f1fe3c847e6919cce900929bdc59 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 11 Apr 2025 11:43:38 -0500 Subject: [PATCH 4/5] PR comments, fix fillDescriptions --- .../SonicCore/interface/RetryActionBase.h | 2 ++ .../SonicCore/src/RetrySameServerAction.cc | 2 +- .../SonicCore/src/SonicClientBase.cc | 17 +++++++---------- .../SonicCore/test/sonicTest_cfg.py | 1 - 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/HeterogeneousCore/SonicCore/interface/RetryActionBase.h b/HeterogeneousCore/SonicCore/interface/RetryActionBase.h index d81183df39a47..e3fc0bbb8af9a 100644 --- a/HeterogeneousCore/SonicCore/interface/RetryActionBase.h +++ b/HeterogeneousCore/SonicCore/interface/RetryActionBase.h @@ -31,3 +31,5 @@ using RetryActionFactory = edmplugin::PluginFactory; #endif + +#define DEFINE_RETRY_ACTION(type) DEFINE_EDM_PLUGIN(RetryActionFactory, type, #type); diff --git a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc index b5a24af935596..31c4fec227500 100644 --- a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc +++ b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc @@ -13,4 +13,4 @@ void RetrySameServerAction::retry() { } } -DEFINE_EDM_PLUGIN(RetryActionFactory, RetrySameServerAction, "RetrySameServerAction"); +DEFINE_RETRY_ACTION(RetrySameServerAction) diff --git a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc index 514a680b2518b..9949d9d1f2ea2 100644 --- a/HeterogeneousCore/SonicCore/src/SonicClientBase.cc +++ b/HeterogeneousCore/SonicCore/src/SonicClientBase.cc @@ -24,7 +24,7 @@ SonicClientBase::SonicClientBase(const edm::ParameterSet& params, //Convert to RetryActionPtr Type from raw pointer of retryAction retryActions_.emplace_back(RetryActionPtr(retryAction.release())); } else { - throw cms::Exception("Configuration") << "Unknown Retry type" << actionType << " for SonicClient: " << modeName; + throw cms::Exception("Configuration") << "Unknown Retry type " << actionType << " for SonicClient: " << modeName; } } @@ -67,8 +67,6 @@ void SonicClientBase::finish(bool success, std::exception_ptr eptr) { //retries are only allowed if no exception was raised if (!success and !eptr) { ++totalTries_; - // Check if any retry actions are still valid - bool anyRetryAllowed = false; for (const auto& action : retryActions_) { if (action->shouldRetry()) { action->retry(); // Call retry only if shouldRetry_ is true @@ -76,13 +74,11 @@ void SonicClientBase::finish(bool success, std::exception_ptr eptr) { } } //prepare an exception if no more retries left - if (!anyRetryAllowed) { - edm::LogInfo("SonicClientBase") << "SonicCallFailed: call failed, no retry actions available after " - << totalTries_ << " tries."; - edm::Exception ex(edm::errors::ExternalFailure); - ex << "SonicCallFailed: call failed, no retry actions available after " << totalTries_ << " tries."; - eptr = make_exception_ptr(ex); - } + edm::LogInfo("SonicClientBase") << "SonicCallFailed: call failed, no retry actions available after " << totalTries_ + << " tries."; + edm::Exception ex(edm::errors::ExternalFailure); + ex << "SonicCallFailed: call failed, no retry actions available after " << totalTries_ << " tries."; + eptr = make_exception_ptr(ex); } if (holder_) { holder_->doneWaiting(eptr); @@ -103,6 +99,7 @@ void SonicClientBase::fillBasePSetDescription(edm::ParameterSetDescription& desc // Defines the structure of each entry in the VPSet edm::ParameterSetDescription retryDesc; retryDesc.add("retryType", "RetrySameServerAction"); + retryDesc.addUntracked("allowedTries", 0); // Define a default retry action edm::ParameterSet defaultRetry; diff --git a/HeterogeneousCore/SonicCore/test/sonicTest_cfg.py b/HeterogeneousCore/SonicCore/test/sonicTest_cfg.py index 43a183372dc33..bf7b44cb01519 100644 --- a/HeterogeneousCore/SonicCore/test/sonicTest_cfg.py +++ b/HeterogeneousCore/SonicCore/test/sonicTest_cfg.py @@ -19,7 +19,6 @@ process.options.numberOfThreads = 2 process.options.numberOfStreams = 0 - process.dummySync = _moduleClass(_moduleName, input = cms.int32(1), Client = cms.PSet( From ca570d7399b56e07fc538842568ffd8280216500 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 11 Apr 2025 15:47:34 -0500 Subject: [PATCH 5/5] Move RetrySameServerAction to plugins. SonicTriton test works. --- HeterogeneousCore/SonicCore/BuildFile.xml | 3 ++- .../SonicCore/plugins/BuildFile.xml | 6 ++++++ .../RetrySameServerAction.cc} | 14 ++++++++++++++ .../SonicCore/src/RetrySameServerAction.cc | 16 ---------------- .../SonicTriton/src/TritonClient.cc | 2 +- .../SonicTriton/test/tritonTest_cfg.py | 7 ++++++- 6 files changed, 29 insertions(+), 19 deletions(-) create mode 100644 HeterogeneousCore/SonicCore/plugins/BuildFile.xml rename HeterogeneousCore/SonicCore/{interface/RetrySameServerAction.h => plugins/RetrySameServerAction.cc} (56%) delete mode 100644 HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc diff --git a/HeterogeneousCore/SonicCore/BuildFile.xml b/HeterogeneousCore/SonicCore/BuildFile.xml index b0d5e2a08b98f..9796c4363c612 100644 --- a/HeterogeneousCore/SonicCore/BuildFile.xml +++ b/HeterogeneousCore/SonicCore/BuildFile.xml @@ -2,7 +2,8 @@ + - +i diff --git a/HeterogeneousCore/SonicCore/plugins/BuildFile.xml b/HeterogeneousCore/SonicCore/plugins/BuildFile.xml new file mode 100644 index 0000000000000..0ecf2187a0f82 --- /dev/null +++ b/HeterogeneousCore/SonicCore/plugins/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h b/HeterogeneousCore/SonicCore/plugins/RetrySameServerAction.cc similarity index 56% rename from HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h rename to HeterogeneousCore/SonicCore/plugins/RetrySameServerAction.cc index 8ecce2a170847..9877013b93d5b 100644 --- a/HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h +++ b/HeterogeneousCore/SonicCore/plugins/RetrySameServerAction.cc @@ -14,3 +14,17 @@ class RetrySameServerAction : public RetryActionBase { private: unsigned allowedTries_, tries_; }; + +void RetrySameServerAction::retry() { + ++tries_; + //if max retries has not been exceeded, call evaluate again + if (tries_ < allowedTries_) { + eval(); + return; + } else { + shouldRetry_ = false; // Flip flag when max retries are reached + edm::LogInfo("RetrySameServerAction") << "Max retry attempts reached. No further retries."; + } +} + +DEFINE_RETRY_ACTION(RetrySameServerAction) diff --git a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc b/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc deleted file mode 100644 index 31c4fec227500..0000000000000 --- a/HeterogeneousCore/SonicCore/src/RetrySameServerAction.cc +++ /dev/null @@ -1,16 +0,0 @@ -#include "HeterogeneousCore/SonicCore/interface/RetrySameServerAction.h" -#include "HeterogeneousCore/SonicCore/interface/SonicClientBase.h" - -void RetrySameServerAction::retry() { - ++tries_; - //if max retries has not been exceeded, call evaluate again - if (tries_ < allowedTries_) { - eval(); - return; - } else { - shouldRetry_ = false; // Flip flag when max retries are reached - edm::LogInfo("RetrySameServerAction") << "Max retry attempts reached. No further retries."; - } -} - -DEFINE_RETRY_ACTION(RetrySameServerAction) diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index ddcdff83448d0..729b6b74ca8dc 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -369,7 +369,7 @@ void TritonClient::getResults(const std::vector //default case for sync and pseudo async void TritonClient::evaluate() { //undo previous signal from TritonException - if (tries_ > 0) { + if (totalTries_ > 0) { // If we are retrying then the evaluate method is called outside the frameworks TBB thread pool. // So we need to setup the service token for the current thread to access the service registry. edm::ServiceRegistry::Operate op(token_); diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 9cede0e496706..f27d7711665af 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -123,9 +123,14 @@ modelVersion = cms.string(""), modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(model)), verbose = cms.untracked.bool(options.verbose or options.verboseClient), - allowedTries = cms.untracked.uint32(options.tries), useSharedMemory = cms.untracked.bool(not options.noShm), compression = cms.untracked.string(options.compression), + Retry = cms.VPSet( + cms.PSet( + retryType = cms.string('RetrySameServerAction'), + allowedTries = cms.untracked.uint32(options.tries) + ) + ) ) ) )