@@ -5,27 +5,40 @@ import (
5
5
"encoding/base64"
6
6
"encoding/json"
7
7
"fmt"
8
+ "strings"
9
+ "time"
8
10
9
11
. "github.com/onsi/ginkgo/v2"
10
12
. "github.com/onsi/gomega"
11
13
14
+ "github.com/jaypipes/ghw/pkg/cpu"
15
+ "github.com/jaypipes/ghw/pkg/topology"
16
+
12
17
corev1 "k8s.io/api/core/v1"
18
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
19
+ "k8s.io/apimachinery/pkg/api/resource"
13
20
"k8s.io/apimachinery/pkg/runtime"
21
+ "k8s.io/utils/cpuset"
14
22
"k8s.io/utils/ptr"
15
23
24
+ "sigs.k8s.io/controller-runtime/pkg/client"
25
+
16
26
igntypes "github.com/coreos/ignition/v2/config/v3_2/types"
17
27
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18
28
19
29
machineconfigv1 "github.com/openshift/api/machineconfiguration/v1"
20
30
performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
31
+ "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components"
21
32
profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile"
22
33
testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
23
34
24
35
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
36
+ "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/images"
25
37
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
26
38
testlog "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/log"
27
39
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/mcps"
28
40
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes"
41
+ "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/pods"
29
42
30
43
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/poolname"
31
44
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles"
@@ -39,9 +52,31 @@ const (
39
52
fileMode = 0420
40
53
)
41
54
55
+ const (
56
+ // random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
57
+ expectedMinL3GroupSize = 8
58
+ )
59
+
60
+ type Machine struct {
61
+ CPU * cpu.Info `json:"cpu"`
62
+ Topology * topology.Info `json:"topology"`
63
+ }
64
+
65
+ type CacheInfo struct {
66
+ NodeID int
67
+ Level int
68
+ CPUs cpuset.CPUSet
69
+ }
70
+
71
+ type MachineData struct {
72
+ Info Machine
73
+ Caches []CacheInfo
74
+ }
75
+
42
76
var _ = Describe ("[rfe_id:77446] LLC-aware cpu pinning" , Label (string (label .OpenShift )), Ordered , func () {
43
77
var (
44
78
workerRTNodes []corev1.Node
79
+ machineDatas map [string ]MachineData // nodeName -> MachineData
45
80
perfProfile * performancev2.PerformanceProfile
46
81
performanceMCP string
47
82
err error
@@ -59,11 +94,19 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
59
94
Expect (err ).ToNot (HaveOccurred ())
60
95
61
96
workerRTNodes , err = nodes .MatchingOptionalSelector (workerRTNodes )
62
- Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error looking for the optional selector: %v" , err ))
97
+ Expect (err ).ToNot (HaveOccurred (), "error looking for the optional selector: %v" , err )
98
+
99
+ if len (workerRTNodes ) < 1 {
100
+ Skip ("need at least a worker node" )
101
+ }
63
102
64
103
perfProfile , err = profiles .GetByNodeLabels (testutils .NodeSelectorLabels )
65
104
Expect (err ).ToNot (HaveOccurred ())
66
105
106
+ By (fmt .Sprintf ("collecting machine infos for %d nodes" , len (workerRTNodes )))
107
+ machineDatas , err = collectMachineDatas (ctx , workerRTNodes )
108
+ Expect (err ).ToNot (HaveOccurred ())
109
+
67
110
performanceMCP , err = mcps .GetByProfile (perfProfile )
68
111
Expect (err ).ToNot (HaveOccurred ())
69
112
@@ -191,6 +234,81 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
191
234
})
192
235
})
193
236
})
237
+
238
+ Context ("Runtime Tests" , func () {
239
+ var (
240
+ targetNodeName string // pick a random node to simplify our testing - e.g. to know ahead of time expected L3 group size
241
+ targetNodeInfo MachineData // shortcut. Note: **SHALLOW COPY**
242
+ targetL3GroupSize int
243
+
244
+ testPod * corev1.Pod
245
+ )
246
+
247
+ BeforeEach (func () {
248
+ targetNodeName = workerRTNodes [0 ].Name // pick random node
249
+ var ok bool
250
+ targetNodeInfo , ok = machineDatas [targetNodeName ]
251
+ Expect (ok ).To (BeTrue (), "unknown machine data for node %q" , targetNodeName )
252
+
253
+ targetL3GroupSize = expectedL3GroupSize (targetNodeInfo )
254
+ // random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
255
+ Expect (targetL3GroupSize ).Should (BeNumerically (">" , expectedMinL3GroupSize ), "L3 Group size too small: %d" , targetL3GroupSize )
256
+ })
257
+
258
+ // TODO move to DeferCleanup?
259
+ AfterEach (func () {
260
+ if testPod == nil {
261
+ return
262
+ }
263
+ ctx := context .Background ()
264
+ testlog .Infof ("deleting pod %q" , testPod .Name )
265
+ deleteTestPod (ctx , testPod )
266
+ })
267
+
268
+ It ("should align containers which request less than a L3 group size exclusive CPUs" , func (ctx context.Context ) {
269
+ askingCPUs := expectedMinL3GroupSize
270
+
271
+ By (fmt .Sprintf ("Creating a test pod asking for %d exclusive CPUs" , askingCPUs ))
272
+ testPod = makePod (targetNodeName , askingCPUs )
273
+ Expect (testclient .Client .Create (ctx , testPod )).To (Succeed ())
274
+
275
+ By ("Waiting for the guaranteed pod to be ready" )
276
+ testPod , err = pods .WaitForCondition (ctx , client .ObjectKeyFromObject (testPod ), corev1 .PodReady , corev1 .ConditionTrue , 5 * time .Minute )
277
+ Expect (err ).ToNot (HaveOccurred (), "Guaranteed pod did not become ready in time" )
278
+
279
+ logs , err := pods .GetLogs (testclient .K8sClient , testPod )
280
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get logs from test pod" )
281
+
282
+ allocatedCPUs , err := cpuset .Parse (logs )
283
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get cpuset for pod %s/%s from logs %q" , testPod .Namespace , testPod .Name , logs )
284
+ Expect (allocatedCPUs .Size ()).To (Equal (askingCPUs ), "asked %d exclusive CPUs got %v" , askingCPUs , allocatedCPUs )
285
+
286
+ ok , _ := isCPUSetLLCAligned (targetNodeInfo .Caches , allocatedCPUs )
287
+ Expect (ok ).To (BeTrue (), "pod has not L3-aligned CPUs" ) // TODO log what?
288
+ })
289
+
290
+ It ("cannot align containers which request more than a L3 group size exclusive CPUs" , func (ctx context.Context ) {
291
+ askingCPUs := targetL3GroupSize + 2 // TODO: to be really safe we should add SMT level cpus
292
+
293
+ By (fmt .Sprintf ("Creating a test pod asking for %d exclusive CPUs" , askingCPUs ))
294
+ testPod = makePod (targetNodeName , askingCPUs )
295
+ Expect (testclient .Client .Create (ctx , testPod )).To (Succeed ())
296
+
297
+ By ("Waiting for the guaranteed pod to be ready" )
298
+ testPod , err = pods .WaitForCondition (ctx , client .ObjectKeyFromObject (testPod ), corev1 .PodReady , corev1 .ConditionTrue , 5 * time .Minute )
299
+ Expect (err ).ToNot (HaveOccurred (), "Guaranteed pod did not become ready in time" )
300
+
301
+ logs , err := pods .GetLogs (testclient .K8sClient , testPod )
302
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get logs from test pod" )
303
+
304
+ allocatedCPUs , err := cpuset .Parse (logs )
305
+ Expect (err ).ToNot (HaveOccurred (), "Cannot get cpuset for pod %s/%s from logs %q" , testPod .Namespace , testPod .Name , logs )
306
+ Expect (allocatedCPUs .Size ()).To (Equal (askingCPUs ), "asked %d exclusive CPUs got %v" , askingCPUs , allocatedCPUs )
307
+
308
+ ok , _ := isCPUSetLLCAligned (targetNodeInfo .Caches , allocatedCPUs )
309
+ Expect (ok ).To (BeFalse (), "pod exceeds L3 group capacity so it cannot have L3-aligned CPUs" ) // TODO log what?
310
+ })
311
+ })
194
312
})
195
313
196
314
// create Machine config to create text file required to enable prefer-align-cpus-by-uncorecache policy option
@@ -239,3 +357,137 @@ func addContent(ignitionConfig *igntypes.Config, content []byte, dst string, mod
239
357
},
240
358
})
241
359
}
360
+
361
+ func MachineFromJSON (data string ) (Machine , error ) {
362
+ ma := Machine {}
363
+ rd := strings .NewReader (data )
364
+ err := json .NewDecoder (rd ).Decode (& ma )
365
+ return ma , err
366
+ }
367
+
368
+ func isCPUSetLLCAligned (infos []CacheInfo , cpus cpuset.CPUSet ) (bool , * CacheInfo ) {
369
+ for idx := range infos {
370
+ info := & infos [idx ]
371
+ if cpus .IsSubsetOf (info .CPUs ) {
372
+ return true , info
373
+ }
374
+ }
375
+ return false , nil
376
+ }
377
+
378
+ func computeLLCLayout (mi Machine ) []CacheInfo {
379
+ ret := []CacheInfo {}
380
+ for _ , node := range mi .Topology .Nodes {
381
+ for _ , cache := range node .Caches {
382
+ if cache .Level < 3 { // TODO
383
+ continue
384
+ }
385
+ ret = append (ret , CacheInfo {
386
+ NodeID : node .ID ,
387
+ Level : int (cache .Level ),
388
+ CPUs : cpusetFromLogicalProcessors (cache .LogicalProcessors ... ),
389
+ })
390
+ }
391
+ }
392
+ return ret
393
+ }
394
+
395
+ func cpusetFromLogicalProcessors (procs ... uint32 ) cpuset.CPUSet {
396
+ cpuList := make ([]int , 0 , len (procs ))
397
+ for _ , proc := range procs {
398
+ cpuList = append (cpuList , int (proc ))
399
+ }
400
+ return cpuset .New (cpuList ... )
401
+ }
402
+
403
+ func expectedL3GroupSize (md MachineData ) int {
404
+ // TODO: we assume all L3 Groups are equal in size.
405
+ for idx := range md .Caches {
406
+ cache := & md .Caches [idx ]
407
+ if cache .Level != 3 {
408
+ continue
409
+ }
410
+ return cache .CPUs .Size ()
411
+ }
412
+ return 0
413
+ }
414
+
415
+ func collectMachineDatas (ctx context.Context , nodeList []corev1.Node ) (map [string ]MachineData , error ) {
416
+ cmd := []string {"/usr/bin/machineinfo" }
417
+ infos := make (map [string ]MachineData )
418
+ for idx := range nodeList {
419
+ node := & nodeList [idx ]
420
+ out , err := nodes .ExecCommand (ctx , node , cmd )
421
+ if err != nil {
422
+ return infos , err
423
+ }
424
+
425
+ info , err := MachineFromJSON (string (out ))
426
+ if err != nil {
427
+ return infos , err
428
+ }
429
+
430
+ infos [node .Name ] = MachineData {
431
+ Info : info ,
432
+ Caches : computeLLCLayout (info ), // precompute
433
+ }
434
+ }
435
+ return infos , nil
436
+ }
437
+
438
+ func makePod (nodeName string , guaranteedCPUs int ) * corev1.Pod {
439
+ testPod := & corev1.Pod {
440
+ ObjectMeta : metav1.ObjectMeta {
441
+ GenerateName : "test-" ,
442
+ Labels : map [string ]string {
443
+ "test" : "" ,
444
+ },
445
+ Namespace : testutils .NamespaceTesting ,
446
+ },
447
+ Spec : corev1.PodSpec {
448
+ Containers : []corev1.Container {
449
+ {
450
+ Name : "test" ,
451
+ Image : images .Test (),
452
+ Command : []string {
453
+ "/bin/sh" , "-c" , "cat /sys/fs/cgroup/cpuset.cpus.effective && sleep 10h" ,
454
+ },
455
+ },
456
+ },
457
+ NodeName : nodeName ,
458
+ NodeSelector : map [string ]string {
459
+ testutils .LabelHostname : nodeName ,
460
+ },
461
+ },
462
+ }
463
+ if guaranteedCPUs > 0 {
464
+ testPod .Spec .Containers [0 ].Resources = corev1.ResourceRequirements {
465
+ Limits : corev1.ResourceList {
466
+ corev1 .ResourceCPU : * resource .NewQuantity (int64 (guaranteedCPUs ), resource .DecimalSI ),
467
+ corev1 .ResourceMemory : resource .MustParse ("256Mi" ),
468
+ },
469
+ }
470
+ }
471
+ profile , _ := profiles .GetByNodeLabels (testutils .NodeSelectorLabels )
472
+ runtimeClass := components .GetComponentName (profile .Name , components .ComponentNamePrefix )
473
+ testPod .Spec .RuntimeClassName = & runtimeClass
474
+ return testPod
475
+ }
476
+
477
+ func deleteTestPod (ctx context.Context , testpod * corev1.Pod ) bool {
478
+ GinkgoHelper ()
479
+
480
+ // it possible that the pod already was deleted as part of the test, in this case we want to skip teardown
481
+ err := testclient .DataPlaneClient .Get (ctx , client .ObjectKeyFromObject (testpod ), testpod )
482
+ if apierrors .IsNotFound (err ) {
483
+ return false
484
+ }
485
+
486
+ err = testclient .DataPlaneClient .Delete (ctx , testpod )
487
+ Expect (err ).ToNot (HaveOccurred ())
488
+
489
+ err = pods .WaitForDeletion (ctx , testpod , pods .DefaultDeletionTimeout * time .Second )
490
+ Expect (err ).ToNot (HaveOccurred ())
491
+
492
+ return true
493
+ }
0 commit comments