Skip to content

Commit 1bf6210

Browse files
author
Flavio Crisciani
committed
Fix sandbox cleanup
Driver and Sanbox have 2 different stores where the endpoints are saved It is possible that the 2 store go out of sync if the endpoint is added to the driver but there is a crash before the sandbox join. On restart now we take the list of endpoints from the network and we assign them back to the sandbox Signed-off-by: Flavio Crisciani <[email protected]>
1 parent 73f58e1 commit 1bf6210

File tree

2 files changed

+43
-23
lines changed

2 files changed

+43
-23
lines changed

endpoint.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -822,10 +822,6 @@ func (ep *endpoint) Delete(force bool) error {
822822
}
823823
}
824824

825-
if err = n.getController().deleteFromStore(ep); err != nil {
826-
return err
827-
}
828-
829825
defer func() {
830826
if err != nil && !force {
831827
ep.dbExists = false
@@ -842,6 +838,11 @@ func (ep *endpoint) Delete(force bool) error {
842838
return err
843839
}
844840

841+
// This has to come after the sandbox and the driver to guarantee that can be the source of truth on restart cases
842+
if err = n.getController().deleteFromStore(ep); err != nil {
843+
return err
844+
}
845+
845846
ep.releaseAddress()
846847

847848
if err := n.getEpCnt().DecEndpointCnt(); err != nil {

sandbox_store.go

+38-19
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package libnetwork
33
import (
44
"container/heap"
55
"encoding/json"
6-
"sync"
76

87
"github.com/Sirupsen/logrus"
98
"github.com/docker/libnetwork/datastore"
@@ -210,6 +209,40 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) {
210209
return
211210
}
212211

212+
// Get all the endpoints
213+
// Use the network as the source of truth so that if there was an issue before the sandbox registered the endpoint
214+
// this will be taken anyway
215+
endpointsInSandboxID := map[string][]*endpoint{}
216+
nl, err := c.getNetworksForScope(datastore.LocalScope)
217+
if err != nil {
218+
logrus.Warnf("Could not get list of networks during sandbox cleanup: %v", err)
219+
return
220+
}
221+
222+
for _, n := range nl {
223+
var epl []*endpoint
224+
epl, err = n.getEndpointsFromStore()
225+
if err != nil {
226+
logrus.Warnf("Could not get list of endpoints in network %s during sandbox cleanup: %v", n.name, err)
227+
continue
228+
}
229+
for _, ep := range epl {
230+
ep, err = n.getEndpointFromStore(ep.id)
231+
if err != nil {
232+
logrus.Warnf("Could not get endpoint in network %s during sandbox cleanup: %v", n.name, err)
233+
continue
234+
}
235+
if ep.sandboxID == "" {
236+
logrus.Warnf("Endpoint %s not associated to any sandbox, deleting it", ep.id)
237+
ep.Delete(true)
238+
continue
239+
}
240+
241+
// Append the endpoint to the corresponding sandboxID
242+
endpointsInSandboxID[ep.sandboxID] = append(endpointsInSandboxID[ep.sandboxID], ep)
243+
}
244+
}
245+
213246
for _, kvo := range kvol {
214247
sbs := kvo.(*sbState)
215248

@@ -256,25 +289,11 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) {
256289
c.sandboxes[sb.id] = sb
257290
c.Unlock()
258291

259-
for _, eps := range sbs.Eps {
260-
n, err := c.getNetworkFromStore(eps.Nid)
261-
var ep *endpoint
262-
if err != nil {
263-
logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err)
264-
n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}, persist: true}
265-
ep = &endpoint{id: eps.Eid, network: n, sandboxID: sbs.ID}
266-
} else {
267-
ep, err = n.getEndpointFromStore(eps.Eid)
268-
if err != nil {
269-
logrus.Errorf("getEndpointFromStore for eid %s failed while trying to build sandbox for cleanup: %v", eps.Eid, err)
270-
ep = &endpoint{id: eps.Eid, network: n, sandboxID: sbs.ID}
271-
}
272-
}
273-
if _, ok := activeSandboxes[sb.ID()]; ok && err != nil {
274-
logrus.Errorf("failed to restore endpoint %s in %s for container %s due to %v", eps.Eid, eps.Nid, sb.ContainerID(), err)
275-
continue
292+
// Restore all the endpoints that are supposed to be in this sandbox
293+
if eps, ok := endpointsInSandboxID[sb.id]; ok {
294+
for _, ep := range eps {
295+
heap.Push(&sb.endpoints, ep)
276296
}
277-
heap.Push(&sb.endpoints, ep)
278297
}
279298

280299
if _, ok := activeSandboxes[sb.ID()]; !ok {

0 commit comments

Comments
 (0)