Skip to content

Commit 8dc7502

Browse files
committed
Fix hang when stopping a program with a broken serial with a full queue.
1 parent f27f1e9 commit 8dc7502

File tree

4 files changed

+31
-66
lines changed

4 files changed

+31
-66
lines changed

ClientSource/Connection/BotBase.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ class BotBaseController{
4343

4444
// Stop all pending commands. This wipes the command queue on both sides
4545
// and stops any currently executing command.
46-
virtual bool try_stop_all_commands() = 0;
4746
virtual void stop_all_commands() = 0;
4847

4948
// Tell the device that the next command should replace the command queue.
@@ -53,7 +52,6 @@ class BotBaseController{
5352
//
5453
// Once this function is called, all commands before it are no longer
5554
// guaranteed to finish even if no command interrupts it.
56-
virtual bool try_next_command_interrupt() = 0;
5755
virtual void next_command_interrupt() = 0;
5856

5957
public:

ClientSource/Connection/PABotBase.cpp

Lines changed: 23 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -170,34 +170,6 @@ void PABotBase::wait_for_all_requests(const Cancellable* cancelled){
170170

171171
// m_logger.log("Waiting for all requests to finish... Completed.", COLOR_DARKGREEN);
172172
}
173-
174-
bool PABotBase::try_stop_all_commands(){
175-
176-
#ifdef DEBUG_STACK_TRACE
177-
cout << __FILE__ << ":" << __LINE__ << " PABotBase::try_stop_all_commands()" << endl;
178-
cout << "-----------------------------------------------------------------------------------------" << endl;
179-
#if defined(__APPLE__)
180-
void* callstack[128];
181-
int i, frames = backtrace(callstack, 128);
182-
char** strs = backtrace_symbols(callstack, frames);
183-
for (i = 0; i < frames; ++i) {
184-
cout << strs[i] << endl;
185-
}
186-
free(strs);
187-
#endif
188-
cout << "-----------------------------------------------------------------------------------------" << endl;
189-
#endif
190-
191-
auto scope_check = m_sanitizer.check_scope();
192-
193-
uint64_t seqnum = try_issue_request(nullptr, SerialPABotBase::DeviceRequest_request_stop(), true);
194-
if (seqnum != 0){
195-
clear_all_active_commands(seqnum);
196-
return true;
197-
}else{
198-
return false;
199-
}
200-
}
201173
void PABotBase::stop_all_commands(){
202174

203175
#ifdef DEBUG_STACK_TRACE
@@ -217,24 +189,13 @@ void PABotBase::stop_all_commands(){
217189

218190
auto scope_check = m_sanitizer.check_scope();
219191

220-
uint64_t seqnum = issue_request(nullptr, SerialPABotBase::DeviceRequest_request_stop(), true);
192+
uint64_t seqnum = issue_request(nullptr, SerialPABotBase::DeviceRequest_request_stop(), true, true);
221193
clear_all_active_commands(seqnum);
222194
}
223-
bool PABotBase::try_next_command_interrupt(){
224-
auto scope_check = m_sanitizer.check_scope();
225-
226-
uint64_t seqnum = try_issue_request(nullptr, SerialPABotBase::DeviceRequest_next_command_interrupt(), true);
227-
if (seqnum != 0){
228-
clear_all_active_commands(seqnum);
229-
return true;
230-
}else{
231-
return false;
232-
}
233-
}
234195
void PABotBase::next_command_interrupt(){
235196
auto scope_check = m_sanitizer.check_scope();
236197

237-
uint64_t seqnum = issue_request(nullptr, SerialPABotBase::DeviceRequest_next_command_interrupt(), true);
198+
uint64_t seqnum = issue_request(nullptr, SerialPABotBase::DeviceRequest_next_command_interrupt(), true, true);
238199
clear_all_active_commands(seqnum);
239200
}
240201
void PABotBase::clear_all_active_commands(uint64_t seqnum){
@@ -649,7 +610,8 @@ void PABotBase::retransmit_thread(){
649610

650611
uint64_t PABotBase::try_issue_request(
651612
const Cancellable* cancelled,
652-
const BotBaseRequest& request, bool silent_remove
613+
const BotBaseRequest& request,
614+
bool silent_remove, bool do_not_block
653615
){
654616
auto scope_check = m_sanitizer.check_scope();
655617

@@ -677,14 +639,20 @@ uint64_t PABotBase::try_issue_request(
677639
size_t queue_limit = m_max_pending_requests.load(std::memory_order_relaxed);
678640

679641
// Too many unacked requests in flight.
680-
if (inflight_requests() >= queue_limit){
642+
if (!do_not_block && inflight_requests() >= queue_limit){
681643
// m_logger.log("Message throttled due to too many inflight requests.");
682644
return 0;
683645
}
684646

685647
// Don't get too far ahead of the oldest seqnum.
686648
uint64_t seqnum = m_send_seq;
687649
if (seqnum - oldest_live_seqnum() > MAX_SEQNUM_GAP){
650+
if (do_not_block){
651+
throw ConnectionException(
652+
&m_logger,
653+
"Connection has stalled for a long time. Assuming it is dead."
654+
);
655+
}
688656
return 0;
689657
}
690658

@@ -722,7 +690,8 @@ uint64_t PABotBase::try_issue_request(
722690
}
723691
uint64_t PABotBase::try_issue_command(
724692
const Cancellable* cancelled,
725-
const BotBaseRequest& request, bool silent_remove
693+
const BotBaseRequest& request,
694+
bool silent_remove
726695
){
727696
auto scope_check = m_sanitizer.check_scope();
728697

@@ -801,11 +770,12 @@ uint64_t PABotBase::try_issue_command(
801770
}
802771
uint64_t PABotBase::issue_request(
803772
const Cancellable* cancelled,
804-
const BotBaseRequest& request, bool silent_remove
773+
const BotBaseRequest& request,
774+
bool silent_remove, bool do_not_block
805775
){
806776
auto scope_check = m_sanitizer.check_scope();
807777

808-
// Issue a request or a command and return.
778+
// Issue a request and return.
809779
//
810780
// If it cannot be issued (because we're over the limits), this function
811781
// will wait until it can be issued.
@@ -824,7 +794,7 @@ uint64_t PABotBase::issue_request(
824794
//
825795

826796
while (true){
827-
uint64_t seqnum = try_issue_request(cancelled, request, silent_remove);
797+
uint64_t seqnum = try_issue_request(cancelled, request, silent_remove, do_not_block);
828798
if (seqnum != 0){
829799
return seqnum;
830800
}
@@ -843,11 +813,12 @@ uint64_t PABotBase::issue_request(
843813
}
844814
uint64_t PABotBase::issue_command(
845815
const Cancellable* cancelled,
846-
const BotBaseRequest& request, bool silent_remove
816+
const BotBaseRequest& request,
817+
bool silent_remove
847818
){
848819
auto scope_check = m_sanitizer.check_scope();
849820

850-
// Issue a request or a command and return.
821+
// Issue a command and return.
851822
//
852823
// If it cannot be issued (because we're over the limits), this function
853824
// will wait until it can be issued.
@@ -891,7 +862,7 @@ bool PABotBase::try_issue_request(
891862
auto scope_check = m_sanitizer.check_scope();
892863

893864
if (!request.is_command()){
894-
return try_issue_request(cancelled, request, true) != 0;
865+
return try_issue_request(cancelled, request, true, false) != 0;
895866
}else{
896867
return try_issue_command(cancelled, request, true) != 0;
897868
}
@@ -903,7 +874,7 @@ void PABotBase::issue_request(
903874
auto scope_check = m_sanitizer.check_scope();
904875

905876
if (!request.is_command()){
906-
issue_request(cancelled, request, true);
877+
issue_request(cancelled, request, true, false);
907878
}else{
908879
issue_command(cancelled, request, true);
909880
}
@@ -919,7 +890,7 @@ BotBaseMessage PABotBase::issue_request_and_wait(
919890
throw InternalProgramError(&m_logger, PA_CURRENT_FUNCTION, "This function only supports requests.");
920891
}
921892

922-
uint64_t seqnum = issue_request(cancelled, request, false);
893+
uint64_t seqnum = issue_request(cancelled, request, false, false);
923894
return wait_for_request(seqnum, cancelled);
924895
}
925896
BotBaseMessage PABotBase::wait_for_request(uint64_t seqnum, const Cancellable* cancelled){

ClientSource/Connection/PABotBase.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,7 @@ class PABotBase : public BotBaseController, private PABotBaseConnection{
7979
// Basic Requests
8080

8181
virtual void wait_for_all_requests(const Cancellable* cancelled = nullptr) override;
82-
83-
virtual bool try_stop_all_commands() override;
8482
virtual void stop_all_commands() override;
85-
86-
virtual bool try_next_command_interrupt() override;
8783
virtual void next_command_interrupt() override;
8884

8985

@@ -141,21 +137,25 @@ class PABotBase : public BotBaseController, private PABotBaseConnection{
141137
// Returns the seqnum of the request. If failed, returns zero.
142138
uint64_t try_issue_request(
143139
const Cancellable* cancelled,
144-
const BotBaseRequest& request, bool silent_remove
140+
const BotBaseRequest& request,
141+
bool silent_remove, bool do_not_block
145142
);
146143
uint64_t try_issue_command(
147144
const Cancellable* cancelled,
148-
const BotBaseRequest& request, bool silent_remove
145+
const BotBaseRequest& request,
146+
bool silent_remove
149147
);
150148

151149
// Returns the seqnum of the request.
152150
uint64_t issue_request(
153151
const Cancellable* cancelled,
154-
const BotBaseRequest& request, bool silent_remove
152+
const BotBaseRequest& request,
153+
bool silent_remove, bool do_not_block
155154
);
156155
uint64_t issue_command(
157156
const Cancellable* cancelled,
158-
const BotBaseRequest& request, bool silent_remove
157+
const BotBaseRequest& request,
158+
bool silent_remove
159159
);
160160

161161
public:

SerialPrograms/Source/Tests/TestUtils.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,7 @@ class DummyBotBase : public BotBaseController{
6565
virtual void notify_all() override{}
6666

6767
virtual void wait_for_all_requests(const Cancellable* cancelled = nullptr) override {}
68-
69-
virtual bool try_stop_all_commands() override { return true; }
7068
virtual void stop_all_commands() override {}
71-
72-
virtual bool try_next_command_interrupt() override { return true; }
7369
virtual void next_command_interrupt() override {};
7470

7571
virtual bool try_issue_request(

0 commit comments

Comments
 (0)