@@ -5,29 +5,42 @@ import (
5
5
"encoding/base64"
6
6
"encoding/json"
7
7
"fmt"
8
+ "strings"
9
+ "time"
8
10
9
11
. "github.com/onsi/ginkgo/v2"
10
12
. "github.com/onsi/gomega"
11
13
14
+ "github.com/jaypipes/ghw/pkg/cpu"
15
+ "github.com/jaypipes/ghw/pkg/topology"
16
+
12
17
corev1 "k8s.io/api/core/v1"
18
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
19
+ "k8s.io/apimachinery/pkg/api/resource"
13
20
"k8s.io/apimachinery/pkg/runtime"
21
+ "k8s.io/utils/cpuset"
14
22
"k8s.io/utils/ptr"
15
23
24
+ "sigs.k8s.io/controller-runtime/pkg/client"
25
+
16
26
igntypes "github.com/coreos/ignition/v2/config/v3_2/types"
17
27
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18
28
19
29
machineconfigv1 "github.com/openshift/api/machineconfiguration/v1"
20
30
performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
31
+ "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components"
21
32
profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile"
22
33
testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
23
34
24
35
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
36
+ "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/images"
25
37
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
26
38
testlog "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/log"
27
39
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/mcps"
28
40
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes"
29
-
41
+ "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/pods"
30
42
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/poolname"
43
+
31
44
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles"
32
45
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profilesupdate"
33
46
)
@@ -39,10 +52,36 @@ const (
39
52
fileMode = 0420
40
53
)
41
54
55
+ const (
56
+ // random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
57
+ expectedMinL3GroupSize = 8
58
+ )
59
+
60
+ type Machine struct {
61
+ CPU * cpu.Info `json:"cpu"`
62
+ Topology * topology.Info `json:"topology"`
63
+ }
64
+
65
+ type CacheInfo struct {
66
+ NodeID int
67
+ Level int
68
+ CPUs cpuset.CPUSet
69
+ }
70
+
71
+ func (ci CacheInfo ) String () string {
72
+ return fmt .Sprintf ("NUMANode=%d cacheLevel=%d cpus=<%s>" , ci .NodeID , ci .Level , ci .CPUs .String ())
73
+ }
74
+
75
+ type MachineData struct {
76
+ Info Machine
77
+ Caches []CacheInfo
78
+ }
79
+
42
80
var _ = Describe ("[rfe_id:77446] LLC-aware cpu pinning" , Label (string (label .OpenShift )), Ordered , func () {
43
81
var (
44
82
workerRTNodes []corev1.Node
45
- perfProfile * performancev2.PerformanceProfile
83
+ machineDatas map [string ]MachineData // nodeName -> MachineData
84
+ perfProfile * performancev2.PerformanceProfile // original perf profile
46
85
performanceMCP string
47
86
err error
48
87
profileAnnotations map [string ]string
@@ -59,11 +98,23 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
59
98
Expect (err ).ToNot (HaveOccurred ())
60
99
61
100
workerRTNodes , err = nodes .MatchingOptionalSelector (workerRTNodes )
62
- Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error looking for the optional selector: %v" , err ))
101
+ Expect (err ).ToNot (HaveOccurred (), "error looking for the optional selector: %v" , err )
102
+
103
+ if len (workerRTNodes ) < 1 {
104
+ Skip ("need at least a worker node" )
105
+ }
63
106
64
107
perfProfile , err = profiles .GetByNodeLabels (testutils .NodeSelectorLabels )
65
108
Expect (err ).ToNot (HaveOccurred ())
66
109
110
+ By (fmt .Sprintf ("collecting machine infos for %d nodes" , len (workerRTNodes )))
111
+ machineDatas , err = collectMachineDatas (ctx , workerRTNodes )
112
+ Expect (err ).ToNot (HaveOccurred ())
113
+
114
+ for node , data := range machineDatas {
115
+ testlog .Infof ("node=%q data=%v" , node , data .Caches )
116
+ }
117
+
67
118
performanceMCP , err = mcps .GetByProfile (perfProfile )
68
119
Expect (err ).ToNot (HaveOccurred ())
69
120
@@ -91,35 +142,38 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
91
142
// Apply Annotation to enable align-cpu-by-uncorecache cpumanager policy option
92
143
if perfProfile .Annotations == nil || perfProfile .Annotations ["kubeletconfig.experimental" ] != llcPolicy {
93
144
testlog .Info ("Enable align-cpus-by-uncorecache cpumanager policy" )
94
- perfProfile .Annotations = profileAnnotations
145
+ prof := perfProfile .DeepCopy ()
146
+ prof .Annotations = profileAnnotations
95
147
96
148
By ("updating performance profile" )
97
- profiles .UpdateWithRetry (perfProfile )
149
+ profiles .UpdateWithRetry (prof )
98
150
99
151
By (fmt .Sprintf ("Applying changes in performance profile and waiting until %s will start updating" , poolName ))
100
- profilesupdate .WaitForTuningUpdating (ctx , perfProfile )
152
+ profilesupdate .WaitForTuningUpdating (ctx , prof )
101
153
102
154
By (fmt .Sprintf ("Waiting when %s finishes updates" , poolName ))
103
- profilesupdate .WaitForTuningUpdated (ctx , perfProfile )
155
+ profilesupdate .WaitForTuningUpdated (ctx , prof )
104
156
}
105
-
106
157
})
107
158
108
159
AfterAll (func () {
160
+ if perfProfile == nil {
161
+ return //nothing to do!
162
+ }
109
163
110
164
// Delete machine config created to enable uncocre cache cpumanager policy option
111
165
// first make sure the profile doesn't have the annotation
112
166
ctx := context .Background ()
113
- perfProfile , err = profiles . GetByNodeLabels ( testutils . NodeSelectorLabels )
114
- perfProfile .Annotations = nil
167
+ prof := perfProfile . DeepCopy ( )
168
+ prof .Annotations = nil
115
169
By ("updating performance profile" )
116
- profiles .UpdateWithRetry (perfProfile )
170
+ profiles .UpdateWithRetry (prof )
117
171
118
172
By (fmt .Sprintf ("Applying changes in performance profile and waiting until %s will start updating" , poolName ))
119
- profilesupdate .WaitForTuningUpdating (ctx , perfProfile )
173
+ profilesupdate .WaitForTuningUpdating (ctx , prof )
120
174
121
175
By (fmt .Sprintf ("Waiting when %s finishes updates" , poolName ))
122
- profilesupdate .WaitForTuningUpdated (ctx , perfProfile )
176
+ profilesupdate .WaitForTuningUpdated (ctx , prof )
123
177
124
178
// delete the machine config pool
125
179
Expect (testclient .Client .Delete (ctx , mc )).To (Succeed ())
@@ -191,6 +245,81 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
191
245
})
192
246
})
193
247
})
248
+
249
+ Context ("Runtime Tests" , func () {
250
+ var (
251
+ targetNodeName string // pick a random node to simplify our testing - e.g. to know ahead of time expected L3 group size
252
+ targetNodeInfo MachineData // shortcut. Note: **SHALLOW COPY**
253
+ targetL3GroupSize int
254
+
255
+ testPod * corev1.Pod
256
+ )
257
+
258
+ BeforeEach (func () {
259
+ targetNodeName = workerRTNodes [0 ].Name // pick random node
260
+ var ok bool
261
+ targetNodeInfo , ok = machineDatas [targetNodeName ]
262
+ Expect (ok ).To (BeTrue (), "unknown machine data for node %q" , targetNodeName )
263
+
264
+ targetL3GroupSize = expectedL3GroupSize (targetNodeInfo )
265
+ // random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
266
+ Expect (targetL3GroupSize ).Should (BeNumerically (">" , expectedMinL3GroupSize ), "L3 Group size too small: %d" , targetL3GroupSize )
267
+ })
268
+
269
+ // TODO move to DeferCleanup?
270
+ AfterEach (func () {
271
+ if testPod == nil {
272
+ return
273
+ }
274
+ ctx := context .Background ()
275
+ testlog .Infof ("deleting pod %q" , testPod .Name )
276
+ deleteTestPod (ctx , testPod )
277
+ })
278
+
279
+ It ("should align containers which request less than a L3 group size exclusive CPUs" , func (ctx context.Context ) {
280
+ askingCPUs := expectedMinL3GroupSize
281
+
282
+ By (fmt .Sprintf ("Creating a test pod asking for %d exclusive CPUs" , askingCPUs ))
283
+ testPod = makePod (targetNodeName , askingCPUs )
284
+ Expect (testclient .Client .Create (ctx , testPod )).To (Succeed ())
285
+
286
+ By ("Waiting for the guaranteed pod to be ready" )
287
+ testPod , err = pods .WaitForCondition (ctx , client .ObjectKeyFromObject (testPod ), corev1 .PodReady , corev1 .ConditionTrue , 5 * time .Minute )
288
+ Expect (err ).ToNot (HaveOccurred (), "Guaranteed pod did not become ready in time" )
289
+
290
+ logs , err := pods .GetLogs (testclient .K8sClient , testPod )
291
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get logs from test pod" )
292
+
293
+ allocatedCPUs , err := cpuset .Parse (logs )
294
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get cpuset for pod %s/%s from logs %q" , testPod .Namespace , testPod .Name , logs )
295
+ Expect (allocatedCPUs .Size ()).To (Equal (askingCPUs ), "asked %d exclusive CPUs got %v" , askingCPUs , allocatedCPUs )
296
+
297
+ ok , _ := isCPUSetLLCAligned (targetNodeInfo .Caches , allocatedCPUs )
298
+ Expect (ok ).To (BeTrue (), "pod has not L3-aligned CPUs" ) // TODO log what?
299
+ })
300
+
301
+ It ("cannot align containers which request more than a L3 group size exclusive CPUs" , func (ctx context.Context ) {
302
+ askingCPUs := targetL3GroupSize + 2 // TODO: to be really safe we should add SMT level cpus
303
+
304
+ By (fmt .Sprintf ("Creating a test pod asking for %d exclusive CPUs" , askingCPUs ))
305
+ testPod = makePod (targetNodeName , askingCPUs )
306
+ Expect (testclient .Client .Create (ctx , testPod )).To (Succeed ())
307
+
308
+ By ("Waiting for the guaranteed pod to be ready" )
309
+ testPod , err = pods .WaitForCondition (ctx , client .ObjectKeyFromObject (testPod ), corev1 .PodReady , corev1 .ConditionTrue , 5 * time .Minute )
310
+ Expect (err ).ToNot (HaveOccurred (), "Guaranteed pod did not become ready in time" )
311
+
312
+ logs , err := pods .GetLogs (testclient .K8sClient , testPod )
313
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get logs from test pod" )
314
+
315
+ allocatedCPUs , err := cpuset .Parse (logs )
316
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get cpuset for pod %s/%s from logs %q" , testPod .Namespace , testPod .Name , logs )
317
+ Expect (allocatedCPUs .Size ()).To (Equal (askingCPUs ), "asked %d exclusive CPUs got %v" , askingCPUs , allocatedCPUs )
318
+
319
+ ok , _ := isCPUSetLLCAligned (targetNodeInfo .Caches , allocatedCPUs )
320
+ Expect (ok ).To (BeFalse (), "pod exceeds L3 group capacity so it cannot have L3-aligned CPUs" ) // TODO log what?
321
+ })
322
+ })
194
323
})
195
324
196
325
// create Machine config to create text file required to enable prefer-align-cpus-by-uncorecache policy option
@@ -239,3 +368,137 @@ func addContent(ignitionConfig *igntypes.Config, content []byte, dst string, mod
239
368
},
240
369
})
241
370
}
371
+
372
+ func MachineFromJSON (data string ) (Machine , error ) {
373
+ ma := Machine {}
374
+ rd := strings .NewReader (data )
375
+ err := json .NewDecoder (rd ).Decode (& ma )
376
+ return ma , err
377
+ }
378
+
379
+ func isCPUSetLLCAligned (infos []CacheInfo , cpus cpuset.CPUSet ) (bool , * CacheInfo ) {
380
+ for idx := range infos {
381
+ info := & infos [idx ]
382
+ if cpus .IsSubsetOf (info .CPUs ) {
383
+ return true , info
384
+ }
385
+ }
386
+ return false , nil
387
+ }
388
+
389
+ func computeLLCLayout (mi Machine ) []CacheInfo {
390
+ ret := []CacheInfo {}
391
+ for _ , node := range mi .Topology .Nodes {
392
+ for _ , cache := range node .Caches {
393
+ if cache .Level < 3 { // TODO
394
+ continue
395
+ }
396
+ ret = append (ret , CacheInfo {
397
+ NodeID : node .ID ,
398
+ Level : int (cache .Level ),
399
+ CPUs : cpusetFromLogicalProcessors (cache .LogicalProcessors ... ),
400
+ })
401
+ }
402
+ }
403
+ return ret
404
+ }
405
+
406
+ func cpusetFromLogicalProcessors (procs ... uint32 ) cpuset.CPUSet {
407
+ cpuList := make ([]int , 0 , len (procs ))
408
+ for _ , proc := range procs {
409
+ cpuList = append (cpuList , int (proc ))
410
+ }
411
+ return cpuset .New (cpuList ... )
412
+ }
413
+
414
+ func expectedL3GroupSize (md MachineData ) int {
415
+ // TODO: we assume all L3 Groups are equal in size.
416
+ for idx := range md .Caches {
417
+ cache := & md .Caches [idx ]
418
+ if cache .Level != 3 {
419
+ continue
420
+ }
421
+ return cache .CPUs .Size ()
422
+ }
423
+ return 0
424
+ }
425
+
426
+ func collectMachineDatas (ctx context.Context , nodeList []corev1.Node ) (map [string ]MachineData , error ) {
427
+ cmd := []string {"/usr/bin/machineinfo" }
428
+ infos := make (map [string ]MachineData )
429
+ for idx := range nodeList {
430
+ node := & nodeList [idx ]
431
+ out , err := nodes .ExecCommand (ctx , node , cmd )
432
+ if err != nil {
433
+ return infos , err
434
+ }
435
+
436
+ info , err := MachineFromJSON (string (out ))
437
+ if err != nil {
438
+ return infos , err
439
+ }
440
+
441
+ infos [node .Name ] = MachineData {
442
+ Info : info ,
443
+ Caches : computeLLCLayout (info ), // precompute
444
+ }
445
+ }
446
+ return infos , nil
447
+ }
448
+
449
+ func makePod (nodeName string , guaranteedCPUs int ) * corev1.Pod {
450
+ testPod := & corev1.Pod {
451
+ ObjectMeta : metav1.ObjectMeta {
452
+ GenerateName : "test-" ,
453
+ Labels : map [string ]string {
454
+ "test" : "" ,
455
+ },
456
+ Namespace : testutils .NamespaceTesting ,
457
+ },
458
+ Spec : corev1.PodSpec {
459
+ Containers : []corev1.Container {
460
+ {
461
+ Name : "test" ,
462
+ Image : images .Test (),
463
+ Command : []string {
464
+ "/bin/sh" , "-c" , "cat /sys/fs/cgroup/cpuset.cpus.effective && sleep 10h" ,
465
+ },
466
+ },
467
+ },
468
+ NodeName : nodeName ,
469
+ NodeSelector : map [string ]string {
470
+ testutils .LabelHostname : nodeName ,
471
+ },
472
+ },
473
+ }
474
+ if guaranteedCPUs > 0 {
475
+ testPod .Spec .Containers [0 ].Resources = corev1.ResourceRequirements {
476
+ Limits : corev1.ResourceList {
477
+ corev1 .ResourceCPU : * resource .NewQuantity (int64 (guaranteedCPUs ), resource .DecimalSI ),
478
+ corev1 .ResourceMemory : resource .MustParse ("256Mi" ),
479
+ },
480
+ }
481
+ }
482
+ profile , _ := profiles .GetByNodeLabels (testutils .NodeSelectorLabels )
483
+ runtimeClass := components .GetComponentName (profile .Name , components .ComponentNamePrefix )
484
+ testPod .Spec .RuntimeClassName = & runtimeClass
485
+ return testPod
486
+ }
487
+
488
+ func deleteTestPod (ctx context.Context , testpod * corev1.Pod ) bool {
489
+ GinkgoHelper ()
490
+
491
+ // it possible that the pod already was deleted as part of the test, in this case we want to skip teardown
492
+ err := testclient .DataPlaneClient .Get (ctx , client .ObjectKeyFromObject (testpod ), testpod )
493
+ if apierrors .IsNotFound (err ) {
494
+ return false
495
+ }
496
+
497
+ err = testclient .DataPlaneClient .Delete (ctx , testpod )
498
+ Expect (err ).ToNot (HaveOccurred ())
499
+
500
+ err = pods .WaitForDeletion (ctx , testpod , pods .DefaultDeletionTimeout * time .Second )
501
+ Expect (err ).ToNot (HaveOccurred ())
502
+
503
+ return true
504
+ }
0 commit comments