@@ -5,29 +5,42 @@ import (
5
5
"encoding/base64"
6
6
"encoding/json"
7
7
"fmt"
8
+ "strings"
9
+ "time"
8
10
9
11
. "github.com/onsi/ginkgo/v2"
10
12
. "github.com/onsi/gomega"
11
13
14
+ "github.com/jaypipes/ghw/pkg/cpu"
15
+ "github.com/jaypipes/ghw/pkg/topology"
16
+
12
17
corev1 "k8s.io/api/core/v1"
18
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
19
+ "k8s.io/apimachinery/pkg/api/resource"
13
20
"k8s.io/apimachinery/pkg/runtime"
21
+ "k8s.io/utils/cpuset"
14
22
"k8s.io/utils/ptr"
15
23
24
+ "sigs.k8s.io/controller-runtime/pkg/client"
25
+
16
26
igntypes "github.com/coreos/ignition/v2/config/v3_2/types"
17
27
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18
28
19
29
machineconfigv1 "github.com/openshift/api/machineconfiguration/v1"
20
30
performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
31
+ "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components"
21
32
profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile"
22
33
testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
23
34
24
35
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
36
+ "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/images"
25
37
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
26
38
testlog "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/log"
27
39
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/mcps"
28
40
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes"
29
-
41
+ "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/pods"
30
42
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/poolname"
43
+
31
44
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles"
32
45
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profilesupdate"
33
46
)
@@ -39,10 +52,36 @@ const (
39
52
fileMode = 0420
40
53
)
41
54
55
+ const (
56
+ // random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
57
+ expectedMinL3GroupSize = 8
58
+ )
59
+
60
+ type Machine struct {
61
+ CPU * cpu.Info `json:"cpu"`
62
+ Topology * topology.Info `json:"topology"`
63
+ }
64
+
65
+ type CacheInfo struct {
66
+ NodeID int
67
+ Level int
68
+ CPUs cpuset.CPUSet
69
+ }
70
+
71
+ func (ci CacheInfo ) String () string {
72
+ return fmt .Sprintf ("NUMANode=%d cacheLevel=%d cpus=<%s>" , ci .NodeID , ci .Level , ci .CPUs .String ())
73
+ }
74
+
75
+ type MachineData struct {
76
+ Info Machine
77
+ Caches []CacheInfo
78
+ }
79
+
42
80
var _ = Describe ("[rfe_id:77446] LLC-aware cpu pinning" , Label (string (label .OpenShift )), Ordered , func () {
43
81
var (
44
82
workerRTNodes []corev1.Node
45
- perfProfile * performancev2.PerformanceProfile
83
+ machineDatas map [string ]MachineData // nodeName -> MachineData
84
+ perfProfile * performancev2.PerformanceProfile // original perf profile
46
85
performanceMCP string
47
86
err error
48
87
profileAnnotations map [string ]string
@@ -52,18 +91,34 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
52
91
)
53
92
54
93
BeforeAll (func () {
94
+ var hasMachineData bool
55
95
profileAnnotations = make (map [string ]string )
56
96
ctx := context .Background ()
57
97
58
98
workerRTNodes , err = nodes .GetByLabels (testutils .NodeSelectorLabels )
59
99
Expect (err ).ToNot (HaveOccurred ())
60
100
61
101
workerRTNodes , err = nodes .MatchingOptionalSelector (workerRTNodes )
62
- Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error looking for the optional selector: %v" , err ))
102
+ Expect (err ).ToNot (HaveOccurred (), "error looking for the optional selector: %v" , err )
103
+
104
+ if len (workerRTNodes ) < 1 {
105
+ Skip ("need at least a worker node" )
106
+ }
63
107
64
108
perfProfile , err = profiles .GetByNodeLabels (testutils .NodeSelectorLabels )
65
109
Expect (err ).ToNot (HaveOccurred ())
66
110
111
+ By (fmt .Sprintf ("collecting machine infos for %d nodes" , len (workerRTNodes )))
112
+ machineDatas , hasMachineData , err = collectMachineDatas (ctx , workerRTNodes )
113
+ if ! hasMachineData {
114
+ Skip ("need machinedata available - please check the image for the presence of the machineinfo tool" )
115
+ }
116
+ Expect (err ).ToNot (HaveOccurred ())
117
+
118
+ for node , data := range machineDatas {
119
+ testlog .Infof ("node=%q data=%v" , node , data .Caches )
120
+ }
121
+
67
122
performanceMCP , err = mcps .GetByProfile (perfProfile )
68
123
Expect (err ).ToNot (HaveOccurred ())
69
124
@@ -91,35 +146,38 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
91
146
// Apply Annotation to enable align-cpu-by-uncorecache cpumanager policy option
92
147
if perfProfile .Annotations == nil || perfProfile .Annotations ["kubeletconfig.experimental" ] != llcPolicy {
93
148
testlog .Info ("Enable align-cpus-by-uncorecache cpumanager policy" )
94
- perfProfile .Annotations = profileAnnotations
149
+ prof := perfProfile .DeepCopy ()
150
+ prof .Annotations = profileAnnotations
95
151
96
152
By ("updating performance profile" )
97
- profiles .UpdateWithRetry (perfProfile )
153
+ profiles .UpdateWithRetry (prof )
98
154
99
155
By (fmt .Sprintf ("Applying changes in performance profile and waiting until %s will start updating" , poolName ))
100
- profilesupdate .WaitForTuningUpdating (ctx , perfProfile )
156
+ profilesupdate .WaitForTuningUpdating (ctx , prof )
101
157
102
158
By (fmt .Sprintf ("Waiting when %s finishes updates" , poolName ))
103
- profilesupdate .WaitForTuningUpdated (ctx , perfProfile )
159
+ profilesupdate .WaitForTuningUpdated (ctx , prof )
104
160
}
105
-
106
161
})
107
162
108
163
AfterAll (func () {
164
+ if perfProfile == nil {
165
+ return //nothing to do!
166
+ }
109
167
110
168
// Delete machine config created to enable uncocre cache cpumanager policy option
111
169
// first make sure the profile doesn't have the annotation
112
170
ctx := context .Background ()
113
- perfProfile , err = profiles . GetByNodeLabels ( testutils . NodeSelectorLabels )
114
- perfProfile .Annotations = nil
171
+ prof := perfProfile . DeepCopy ( )
172
+ prof .Annotations = nil
115
173
By ("updating performance profile" )
116
- profiles .UpdateWithRetry (perfProfile )
174
+ profiles .UpdateWithRetry (prof )
117
175
118
176
By (fmt .Sprintf ("Applying changes in performance profile and waiting until %s will start updating" , poolName ))
119
- profilesupdate .WaitForTuningUpdating (ctx , perfProfile )
177
+ profilesupdate .WaitForTuningUpdating (ctx , prof )
120
178
121
179
By (fmt .Sprintf ("Waiting when %s finishes updates" , poolName ))
122
- profilesupdate .WaitForTuningUpdated (ctx , perfProfile )
180
+ profilesupdate .WaitForTuningUpdated (ctx , prof )
123
181
124
182
// delete the machine config pool
125
183
Expect (testclient .Client .Delete (ctx , mc )).To (Succeed ())
@@ -191,6 +249,81 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
191
249
})
192
250
})
193
251
})
252
+
253
+ Context ("Runtime Tests" , func () {
254
+ var (
255
+ targetNodeName string // pick a random node to simplify our testing - e.g. to know ahead of time expected L3 group size
256
+ targetNodeInfo MachineData // shortcut. Note: **SHALLOW COPY**
257
+ targetL3GroupSize int
258
+
259
+ testPod * corev1.Pod
260
+ )
261
+
262
+ BeforeEach (func () {
263
+ targetNodeName = workerRTNodes [0 ].Name // pick random node
264
+ var ok bool
265
+ targetNodeInfo , ok = machineDatas [targetNodeName ]
266
+ Expect (ok ).To (BeTrue (), "unknown machine data for node %q" , targetNodeName )
267
+
268
+ targetL3GroupSize = expectedL3GroupSize (targetNodeInfo )
269
+ // random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
270
+ Expect (targetL3GroupSize ).Should (BeNumerically (">" , expectedMinL3GroupSize ), "L3 Group size too small: %d" , targetL3GroupSize )
271
+ })
272
+
273
+ // TODO move to DeferCleanup?
274
+ AfterEach (func () {
275
+ if testPod == nil {
276
+ return
277
+ }
278
+ ctx := context .Background ()
279
+ testlog .Infof ("deleting pod %q" , testPod .Name )
280
+ deleteTestPod (ctx , testPod )
281
+ })
282
+
283
+ It ("should align containers which request less than a L3 group size exclusive CPUs" , func (ctx context.Context ) {
284
+ askingCPUs := expectedMinL3GroupSize
285
+
286
+ By (fmt .Sprintf ("Creating a test pod asking for %d exclusive CPUs" , askingCPUs ))
287
+ testPod = makePod (targetNodeName , askingCPUs )
288
+ Expect (testclient .Client .Create (ctx , testPod )).To (Succeed ())
289
+
290
+ By ("Waiting for the guaranteed pod to be ready" )
291
+ testPod , err = pods .WaitForCondition (ctx , client .ObjectKeyFromObject (testPod ), corev1 .PodReady , corev1 .ConditionTrue , 5 * time .Minute )
292
+ Expect (err ).ToNot (HaveOccurred (), "Guaranteed pod did not become ready in time" )
293
+
294
+ logs , err := pods .GetLogs (testclient .K8sClient , testPod )
295
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get logs from test pod" )
296
+
297
+ allocatedCPUs , err := cpuset .Parse (logs )
298
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get cpuset for pod %s/%s from logs %q" , testPod .Namespace , testPod .Name , logs )
299
+ Expect (allocatedCPUs .Size ()).To (Equal (askingCPUs ), "asked %d exclusive CPUs got %v" , askingCPUs , allocatedCPUs )
300
+
301
+ ok , _ := isCPUSetLLCAligned (targetNodeInfo .Caches , allocatedCPUs )
302
+ Expect (ok ).To (BeTrue (), "pod has not L3-aligned CPUs" ) // TODO log what?
303
+ })
304
+
305
+ It ("cannot align containers which request more than a L3 group size exclusive CPUs" , func (ctx context.Context ) {
306
+ askingCPUs := targetL3GroupSize + 2 // TODO: to be really safe we should add SMT level cpus
307
+
308
+ By (fmt .Sprintf ("Creating a test pod asking for %d exclusive CPUs" , askingCPUs ))
309
+ testPod = makePod (targetNodeName , askingCPUs )
310
+ Expect (testclient .Client .Create (ctx , testPod )).To (Succeed ())
311
+
312
+ By ("Waiting for the guaranteed pod to be ready" )
313
+ testPod , err = pods .WaitForCondition (ctx , client .ObjectKeyFromObject (testPod ), corev1 .PodReady , corev1 .ConditionTrue , 5 * time .Minute )
314
+ Expect (err ).ToNot (HaveOccurred (), "Guaranteed pod did not become ready in time" )
315
+
316
+ logs , err := pods .GetLogs (testclient .K8sClient , testPod )
317
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get logs from test pod" )
318
+
319
+ allocatedCPUs , err := cpuset .Parse (logs )
320
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get cpuset for pod %s/%s from logs %q" , testPod .Namespace , testPod .Name , logs )
321
+ Expect (allocatedCPUs .Size ()).To (Equal (askingCPUs ), "asked %d exclusive CPUs got %v" , askingCPUs , allocatedCPUs )
322
+
323
+ ok , _ := isCPUSetLLCAligned (targetNodeInfo .Caches , allocatedCPUs )
324
+ Expect (ok ).To (BeFalse (), "pod exceeds L3 group capacity so it cannot have L3-aligned CPUs" ) // TODO log what?
325
+ })
326
+ })
194
327
})
195
328
196
329
// create Machine config to create text file required to enable prefer-align-cpus-by-uncorecache policy option
@@ -239,3 +372,137 @@ func addContent(ignitionConfig *igntypes.Config, content []byte, dst string, mod
239
372
},
240
373
})
241
374
}
375
+
376
+ func MachineFromJSON (data string ) (Machine , error ) {
377
+ ma := Machine {}
378
+ rd := strings .NewReader (data )
379
+ err := json .NewDecoder (rd ).Decode (& ma )
380
+ return ma , err
381
+ }
382
+
383
+ func isCPUSetLLCAligned (infos []CacheInfo , cpus cpuset.CPUSet ) (bool , * CacheInfo ) {
384
+ for idx := range infos {
385
+ info := & infos [idx ]
386
+ if cpus .IsSubsetOf (info .CPUs ) {
387
+ return true , info
388
+ }
389
+ }
390
+ return false , nil
391
+ }
392
+
393
+ func computeLLCLayout (mi Machine ) []CacheInfo {
394
+ ret := []CacheInfo {}
395
+ for _ , node := range mi .Topology .Nodes {
396
+ for _ , cache := range node .Caches {
397
+ if cache .Level < 3 { // TODO
398
+ continue
399
+ }
400
+ ret = append (ret , CacheInfo {
401
+ NodeID : node .ID ,
402
+ Level : int (cache .Level ),
403
+ CPUs : cpusetFromLogicalProcessors (cache .LogicalProcessors ... ),
404
+ })
405
+ }
406
+ }
407
+ return ret
408
+ }
409
+
410
+ func cpusetFromLogicalProcessors (procs ... uint32 ) cpuset.CPUSet {
411
+ cpuList := make ([]int , 0 , len (procs ))
412
+ for _ , proc := range procs {
413
+ cpuList = append (cpuList , int (proc ))
414
+ }
415
+ return cpuset .New (cpuList ... )
416
+ }
417
+
418
+ func expectedL3GroupSize (md MachineData ) int {
419
+ // TODO: we assume all L3 Groups are equal in size.
420
+ for idx := range md .Caches {
421
+ cache := & md .Caches [idx ]
422
+ if cache .Level != 3 {
423
+ continue
424
+ }
425
+ return cache .CPUs .Size ()
426
+ }
427
+ return 0
428
+ }
429
+
430
+ func collectMachineDatas (ctx context.Context , nodeList []corev1.Node ) (map [string ]MachineData , bool , error ) {
431
+ cmd := []string {"/usr/bin/machineinfo" }
432
+ infos := make (map [string ]MachineData )
433
+ for idx := range nodeList {
434
+ node := & nodeList [idx ]
435
+ out , err := nodes .ExecCommand (ctx , node , cmd )
436
+ if err != nil {
437
+ return infos , false , err
438
+ }
439
+
440
+ info , err := MachineFromJSON (string (out ))
441
+ if err != nil {
442
+ return infos , true , err
443
+ }
444
+
445
+ infos [node .Name ] = MachineData {
446
+ Info : info ,
447
+ Caches : computeLLCLayout (info ), // precompute
448
+ }
449
+ }
450
+ return infos , true , nil
451
+ }
452
+
453
+ func makePod (nodeName string , guaranteedCPUs int ) * corev1.Pod {
454
+ testPod := & corev1.Pod {
455
+ ObjectMeta : metav1.ObjectMeta {
456
+ GenerateName : "test-" ,
457
+ Labels : map [string ]string {
458
+ "test" : "" ,
459
+ },
460
+ Namespace : testutils .NamespaceTesting ,
461
+ },
462
+ Spec : corev1.PodSpec {
463
+ Containers : []corev1.Container {
464
+ {
465
+ Name : "test" ,
466
+ Image : images .Test (),
467
+ Command : []string {
468
+ "/bin/sh" , "-c" , "cat /sys/fs/cgroup/cpuset.cpus.effective && sleep 10h" ,
469
+ },
470
+ },
471
+ },
472
+ NodeName : nodeName ,
473
+ NodeSelector : map [string ]string {
474
+ testutils .LabelHostname : nodeName ,
475
+ },
476
+ },
477
+ }
478
+ if guaranteedCPUs > 0 {
479
+ testPod .Spec .Containers [0 ].Resources = corev1.ResourceRequirements {
480
+ Limits : corev1.ResourceList {
481
+ corev1 .ResourceCPU : * resource .NewQuantity (int64 (guaranteedCPUs ), resource .DecimalSI ),
482
+ corev1 .ResourceMemory : resource .MustParse ("256Mi" ),
483
+ },
484
+ }
485
+ }
486
+ profile , _ := profiles .GetByNodeLabels (testutils .NodeSelectorLabels )
487
+ runtimeClass := components .GetComponentName (profile .Name , components .ComponentNamePrefix )
488
+ testPod .Spec .RuntimeClassName = & runtimeClass
489
+ return testPod
490
+ }
491
+
492
+ func deleteTestPod (ctx context.Context , testpod * corev1.Pod ) bool {
493
+ GinkgoHelper ()
494
+
495
+ // it possible that the pod already was deleted as part of the test, in this case we want to skip teardown
496
+ err := testclient .DataPlaneClient .Get (ctx , client .ObjectKeyFromObject (testpod ), testpod )
497
+ if apierrors .IsNotFound (err ) {
498
+ return false
499
+ }
500
+
501
+ err = testclient .DataPlaneClient .Delete (ctx , testpod )
502
+ Expect (err ).ToNot (HaveOccurred ())
503
+
504
+ err = pods .WaitForDeletion (ctx , testpod , pods .DefaultDeletionTimeout * time .Second )
505
+ Expect (err ).ToNot (HaveOccurred ())
506
+
507
+ return true
508
+ }
0 commit comments