Skip to content

Commit 70cfaa3

Browse files
committed
e2e: llc: initial tests for cpu allocation
bootstrap the tests which actually run pods and check the CPU (and thus the L3/LLC) allocation. Start with the trivial sanity tests, adding a good chunk of required infra utilities along the way. Signed-off-by: Francesco Romani <[email protected]>
1 parent 4f58be1 commit 70cfaa3

File tree

1 file changed

+276
-13
lines changed
  • test/e2e/performanceprofile/functests/13_llc

1 file changed

+276
-13
lines changed

test/e2e/performanceprofile/functests/13_llc/llc.go

Lines changed: 276 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,42 @@ import (
55
"encoding/base64"
66
"encoding/json"
77
"fmt"
8+
"strings"
9+
"time"
810

911
. "github.com/onsi/ginkgo/v2"
1012
. "github.com/onsi/gomega"
1113

14+
"github.com/jaypipes/ghw/pkg/cpu"
15+
"github.com/jaypipes/ghw/pkg/topology"
16+
1217
corev1 "k8s.io/api/core/v1"
18+
apierrors "k8s.io/apimachinery/pkg/api/errors"
19+
"k8s.io/apimachinery/pkg/api/resource"
1320
"k8s.io/apimachinery/pkg/runtime"
21+
"k8s.io/utils/cpuset"
1422
"k8s.io/utils/ptr"
1523

24+
"sigs.k8s.io/controller-runtime/pkg/client"
25+
1626
igntypes "github.com/coreos/ignition/v2/config/v3_2/types"
1727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1828

1929
machineconfigv1 "github.com/openshift/api/machineconfiguration/v1"
2030
performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
31+
"github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components"
2132
profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile"
2233
testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
2334

2435
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
36+
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/images"
2537
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
2638
testlog "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/log"
2739
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/mcps"
2840
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes"
29-
41+
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/pods"
3042
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/poolname"
43+
3144
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles"
3245
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profilesupdate"
3346
)
@@ -39,10 +52,36 @@ const (
3952
fileMode = 0420
4053
)
4154

55+
const (
56+
// random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
57+
expectedMinL3GroupSize = 8
58+
)
59+
60+
type Machine struct {
61+
CPU *cpu.Info `json:"cpu"`
62+
Topology *topology.Info `json:"topology"`
63+
}
64+
65+
type CacheInfo struct {
66+
NodeID int
67+
Level int
68+
CPUs cpuset.CPUSet
69+
}
70+
71+
func (ci CacheInfo) String() string {
72+
return fmt.Sprintf("NUMANode=%d cacheLevel=%d cpus=<%s>", ci.NodeID, ci.Level, ci.CPUs.String())
73+
}
74+
75+
type MachineData struct {
76+
Info Machine
77+
Caches []CacheInfo
78+
}
79+
4280
var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.OpenShift)), Ordered, func() {
4381
var (
4482
workerRTNodes []corev1.Node
45-
perfProfile *performancev2.PerformanceProfile
83+
machineDatas map[string]MachineData // nodeName -> MachineData
84+
perfProfile *performancev2.PerformanceProfile // original perf profile
4685
performanceMCP string
4786
err error
4887
profileAnnotations map[string]string
@@ -59,11 +98,23 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
5998
Expect(err).ToNot(HaveOccurred())
6099

61100
workerRTNodes, err = nodes.MatchingOptionalSelector(workerRTNodes)
62-
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("error looking for the optional selector: %v", err))
101+
Expect(err).ToNot(HaveOccurred(), "error looking for the optional selector: %v", err)
102+
103+
if len(workerRTNodes) < 1 {
104+
Skip("need at least a worker node")
105+
}
63106

64107
perfProfile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
65108
Expect(err).ToNot(HaveOccurred())
66109

110+
By(fmt.Sprintf("collecting machine infos for %d nodes", len(workerRTNodes)))
111+
machineDatas, err = collectMachineDatas(ctx, workerRTNodes)
112+
Expect(err).ToNot(HaveOccurred())
113+
114+
for node, data := range machineDatas {
115+
testlog.Infof("node=%q data=%v", node, data.Caches)
116+
}
117+
67118
performanceMCP, err = mcps.GetByProfile(perfProfile)
68119
Expect(err).ToNot(HaveOccurred())
69120

@@ -91,35 +142,38 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
91142
// Apply Annotation to enable align-cpu-by-uncorecache cpumanager policy option
92143
if perfProfile.Annotations == nil || perfProfile.Annotations["kubeletconfig.experimental"] != llcPolicy {
93144
testlog.Info("Enable align-cpus-by-uncorecache cpumanager policy")
94-
perfProfile.Annotations = profileAnnotations
145+
prof := perfProfile.DeepCopy()
146+
prof.Annotations = profileAnnotations
95147

96148
By("updating performance profile")
97-
profiles.UpdateWithRetry(perfProfile)
149+
profiles.UpdateWithRetry(prof)
98150

99151
By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
100-
profilesupdate.WaitForTuningUpdating(ctx, perfProfile)
152+
profilesupdate.WaitForTuningUpdating(ctx, prof)
101153

102154
By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
103-
profilesupdate.WaitForTuningUpdated(ctx, perfProfile)
155+
profilesupdate.WaitForTuningUpdated(ctx, prof)
104156
}
105-
106157
})
107158

108159
AfterAll(func() {
160+
if perfProfile == nil {
161+
return //nothing to do!
162+
}
109163

110164
// Delete machine config created to enable uncocre cache cpumanager policy option
111165
// first make sure the profile doesn't have the annotation
112166
ctx := context.Background()
113-
perfProfile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
114-
perfProfile.Annotations = nil
167+
prof := perfProfile.DeepCopy()
168+
prof.Annotations = nil
115169
By("updating performance profile")
116-
profiles.UpdateWithRetry(perfProfile)
170+
profiles.UpdateWithRetry(prof)
117171

118172
By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
119-
profilesupdate.WaitForTuningUpdating(ctx, perfProfile)
173+
profilesupdate.WaitForTuningUpdating(ctx, prof)
120174

121175
By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
122-
profilesupdate.WaitForTuningUpdated(ctx, perfProfile)
176+
profilesupdate.WaitForTuningUpdated(ctx, prof)
123177

124178
// delete the machine config pool
125179
Expect(testclient.Client.Delete(ctx, mc)).To(Succeed())
@@ -191,6 +245,81 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
191245
})
192246
})
193247
})
248+
249+
Context("Runtime Tests", func() {
250+
var (
251+
targetNodeName string // pick a random node to simplify our testing - e.g. to know ahead of time expected L3 group size
252+
targetNodeInfo MachineData // shortcut. Note: **SHALLOW COPY**
253+
targetL3GroupSize int
254+
255+
testPod *corev1.Pod
256+
)
257+
258+
BeforeEach(func() {
259+
targetNodeName = workerRTNodes[0].Name // pick random node
260+
var ok bool
261+
targetNodeInfo, ok = machineDatas[targetNodeName]
262+
Expect(ok).To(BeTrue(), "unknown machine data for node %q", targetNodeName)
263+
264+
targetL3GroupSize = expectedL3GroupSize(targetNodeInfo)
265+
// random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
266+
Expect(targetL3GroupSize).Should(BeNumerically(">", expectedMinL3GroupSize), "L3 Group size too small: %d", targetL3GroupSize)
267+
})
268+
269+
// TODO move to DeferCleanup?
270+
AfterEach(func() {
271+
if testPod == nil {
272+
return
273+
}
274+
ctx := context.Background()
275+
testlog.Infof("deleting pod %q", testPod.Name)
276+
deleteTestPod(ctx, testPod)
277+
})
278+
279+
It("should align containers which request less than a L3 group size exclusive CPUs", func(ctx context.Context) {
280+
askingCPUs := expectedMinL3GroupSize
281+
282+
By(fmt.Sprintf("Creating a test pod asking for %d exclusive CPUs", askingCPUs))
283+
testPod = makePod(targetNodeName, askingCPUs)
284+
Expect(testclient.Client.Create(ctx, testPod)).To(Succeed())
285+
286+
By("Waiting for the guaranteed pod to be ready")
287+
testPod, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
288+
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time")
289+
290+
logs, err := pods.GetLogs(testclient.K8sClient, testPod)
291+
Expect(err).ToNot(HaveOccurred(), "Cannot get logs from test pod")
292+
293+
allocatedCPUs, err := cpuset.Parse(logs)
294+
Expect(err).ToNot(HaveOccurred(), "Cannot get cpuset for pod %s/%s from logs %q", testPod.Namespace, testPod.Name, logs)
295+
Expect(allocatedCPUs.Size()).To(Equal(askingCPUs), "asked %d exclusive CPUs got %v", askingCPUs, allocatedCPUs)
296+
297+
ok, _ := isCPUSetLLCAligned(targetNodeInfo.Caches, allocatedCPUs)
298+
Expect(ok).To(BeTrue(), "pod has not L3-aligned CPUs") // TODO log what?
299+
})
300+
301+
It("cannot align containers which request more than a L3 group size exclusive CPUs", func(ctx context.Context) {
302+
askingCPUs := targetL3GroupSize + 2 // TODO: to be really safe we should add SMT level cpus
303+
304+
By(fmt.Sprintf("Creating a test pod asking for %d exclusive CPUs", askingCPUs))
305+
testPod = makePod(targetNodeName, askingCPUs)
306+
Expect(testclient.Client.Create(ctx, testPod)).To(Succeed())
307+
308+
By("Waiting for the guaranteed pod to be ready")
309+
testPod, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
310+
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time")
311+
312+
logs, err := pods.GetLogs(testclient.K8sClient, testPod)
313+
Expect(err).ToNot(HaveOccurred(), "Cannot get logs from test pod")
314+
315+
allocatedCPUs, err := cpuset.Parse(logs)
316+
Expect(err).ToNot(HaveOccurred(), "Cannot get cpuset for pod %s/%s from logs %q", testPod.Namespace, testPod.Name, logs)
317+
Expect(allocatedCPUs.Size()).To(Equal(askingCPUs), "asked %d exclusive CPUs got %v", askingCPUs, allocatedCPUs)
318+
319+
ok, _ := isCPUSetLLCAligned(targetNodeInfo.Caches, allocatedCPUs)
320+
Expect(ok).To(BeFalse(), "pod exceeds L3 group capacity so it cannot have L3-aligned CPUs") // TODO log what?
321+
})
322+
})
194323
})
195324

196325
// create Machine config to create text file required to enable prefer-align-cpus-by-uncorecache policy option
@@ -239,3 +368,137 @@ func addContent(ignitionConfig *igntypes.Config, content []byte, dst string, mod
239368
},
240369
})
241370
}
371+
372+
func MachineFromJSON(data string) (Machine, error) {
373+
ma := Machine{}
374+
rd := strings.NewReader(data)
375+
err := json.NewDecoder(rd).Decode(&ma)
376+
return ma, err
377+
}
378+
379+
func isCPUSetLLCAligned(infos []CacheInfo, cpus cpuset.CPUSet) (bool, *CacheInfo) {
380+
for idx := range infos {
381+
info := &infos[idx]
382+
if cpus.IsSubsetOf(info.CPUs) {
383+
return true, info
384+
}
385+
}
386+
return false, nil
387+
}
388+
389+
func computeLLCLayout(mi Machine) []CacheInfo {
390+
ret := []CacheInfo{}
391+
for _, node := range mi.Topology.Nodes {
392+
for _, cache := range node.Caches {
393+
if cache.Level < 3 { // TODO
394+
continue
395+
}
396+
ret = append(ret, CacheInfo{
397+
NodeID: node.ID,
398+
Level: int(cache.Level),
399+
CPUs: cpusetFromLogicalProcessors(cache.LogicalProcessors...),
400+
})
401+
}
402+
}
403+
return ret
404+
}
405+
406+
func cpusetFromLogicalProcessors(procs ...uint32) cpuset.CPUSet {
407+
cpuList := make([]int, 0, len(procs))
408+
for _, proc := range procs {
409+
cpuList = append(cpuList, int(proc))
410+
}
411+
return cpuset.New(cpuList...)
412+
}
413+
414+
func expectedL3GroupSize(md MachineData) int {
415+
// TODO: we assume all L3 Groups are equal in size.
416+
for idx := range md.Caches {
417+
cache := &md.Caches[idx]
418+
if cache.Level != 3 {
419+
continue
420+
}
421+
return cache.CPUs.Size()
422+
}
423+
return 0
424+
}
425+
426+
func collectMachineDatas(ctx context.Context, nodeList []corev1.Node) (map[string]MachineData, error) {
427+
cmd := []string{"/usr/bin/machineinfo"}
428+
infos := make(map[string]MachineData)
429+
for idx := range nodeList {
430+
node := &nodeList[idx]
431+
out, err := nodes.ExecCommand(ctx, node, cmd)
432+
if err != nil {
433+
return infos, err
434+
}
435+
436+
info, err := MachineFromJSON(string(out))
437+
if err != nil {
438+
return infos, err
439+
}
440+
441+
infos[node.Name] = MachineData{
442+
Info: info,
443+
Caches: computeLLCLayout(info), // precompute
444+
}
445+
}
446+
return infos, nil
447+
}
448+
449+
func makePod(nodeName string, guaranteedCPUs int) *corev1.Pod {
450+
testPod := &corev1.Pod{
451+
ObjectMeta: metav1.ObjectMeta{
452+
GenerateName: "test-",
453+
Labels: map[string]string{
454+
"test": "",
455+
},
456+
Namespace: testutils.NamespaceTesting,
457+
},
458+
Spec: corev1.PodSpec{
459+
Containers: []corev1.Container{
460+
{
461+
Name: "test",
462+
Image: images.Test(),
463+
Command: []string{
464+
"/bin/sh", "-c", "cat /sys/fs/cgroup/cpuset.cpus.effective && sleep 10h",
465+
},
466+
},
467+
},
468+
NodeName: nodeName,
469+
NodeSelector: map[string]string{
470+
testutils.LabelHostname: nodeName,
471+
},
472+
},
473+
}
474+
if guaranteedCPUs > 0 {
475+
testPod.Spec.Containers[0].Resources = corev1.ResourceRequirements{
476+
Limits: corev1.ResourceList{
477+
corev1.ResourceCPU: *resource.NewQuantity(int64(guaranteedCPUs), resource.DecimalSI),
478+
corev1.ResourceMemory: resource.MustParse("256Mi"),
479+
},
480+
}
481+
}
482+
profile, _ := profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
483+
runtimeClass := components.GetComponentName(profile.Name, components.ComponentNamePrefix)
484+
testPod.Spec.RuntimeClassName = &runtimeClass
485+
return testPod
486+
}
487+
488+
func deleteTestPod(ctx context.Context, testpod *corev1.Pod) bool {
489+
GinkgoHelper()
490+
491+
// it possible that the pod already was deleted as part of the test, in this case we want to skip teardown
492+
err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), testpod)
493+
if apierrors.IsNotFound(err) {
494+
return false
495+
}
496+
497+
err = testclient.DataPlaneClient.Delete(ctx, testpod)
498+
Expect(err).ToNot(HaveOccurred())
499+
500+
err = pods.WaitForDeletion(ctx, testpod, pods.DefaultDeletionTimeout*time.Second)
501+
Expect(err).ToNot(HaveOccurred())
502+
503+
return true
504+
}

0 commit comments

Comments
 (0)