@@ -78,7 +78,9 @@ use sscanf::sscanf;
78
78
use std:: collections:: BTreeMap ;
79
79
use std:: path:: Path ;
80
80
use std:: path:: PathBuf ;
81
- use std:: sync:: Arc ;
81
+ use std:: sync:: { Arc , Mutex } ;
82
+ use std:: sync:: atomic:: { AtomicU32 , Ordering } ;
83
+ use std:: time:: { Duration , Instant } ;
82
84
83
85
#[ cfg( feature = "gpu-topology" ) ]
84
86
use crate :: gpu:: { create_gpus, Gpu , GpuIndex } ;
@@ -99,6 +101,9 @@ lazy_static::lazy_static! {
99
101
/// disabled CPUs that may not be onlined, whose IDs are lower than the
100
102
/// IDs of other CPUs that may be onlined.
101
103
pub static ref NR_CPUS_POSSIBLE : usize = libbpf_rs:: num_possible_cpus( ) . unwrap( ) ;
104
+
105
+ /// Whether AMD preferred core ranking is enabled on this system
106
+ pub static ref HAS_PREF_RANK : bool = has_pref_rank( ) ;
102
107
}
103
108
104
109
#[ derive( Debug , Clone , Eq , Hash , Ord , PartialEq , PartialOrd ) ]
@@ -107,7 +112,7 @@ pub enum CoreType {
107
112
Little ,
108
113
}
109
114
110
- #[ derive( Debug , Clone , Eq , Hash , Ord , PartialEq , PartialOrd ) ]
115
+ #[ derive( Debug ) ]
111
116
pub struct Cpu {
112
117
pub id : usize ,
113
118
pub min_freq : usize ,
@@ -126,6 +131,105 @@ pub struct Cpu {
126
131
pub node_id : usize ,
127
132
pub package_id : usize ,
128
133
pub cluster_id : usize ,
134
+ rank : AtomicU32 ,
135
+ }
136
+
137
+ impl Clone for Cpu {
138
+ fn clone ( & self ) -> Self {
139
+ Cpu {
140
+ id : self . id ,
141
+ min_freq : self . min_freq ,
142
+ max_freq : self . max_freq ,
143
+ base_freq : self . base_freq ,
144
+ trans_lat_ns : self . trans_lat_ns ,
145
+ l2_id : self . l2_id ,
146
+ l3_id : self . l3_id ,
147
+ core_type : self . core_type . clone ( ) ,
148
+ core_id : self . core_id ,
149
+ llc_id : self . llc_id ,
150
+ node_id : self . node_id ,
151
+ package_id : self . package_id ,
152
+ cluster_id : self . cluster_id ,
153
+ rank : AtomicU32 :: new ( self . rank . load ( Ordering :: Relaxed ) ) ,
154
+ }
155
+ }
156
+ }
157
+
158
+ impl PartialEq for Cpu {
159
+ fn eq ( & self , other : & Self ) -> bool {
160
+ self . id == other. id
161
+ && self . min_freq == other. min_freq
162
+ && self . max_freq == other. max_freq
163
+ && self . base_freq == other. base_freq
164
+ && self . trans_lat_ns == other. trans_lat_ns
165
+ && self . l2_id == other. l2_id
166
+ && self . l3_id == other. l3_id
167
+ && self . core_type == other. core_type
168
+ && self . core_id == other. core_id
169
+ && self . llc_id == other. llc_id
170
+ && self . node_id == other. node_id
171
+ && self . package_id == other. package_id
172
+ && self . cluster_id == other. cluster_id
173
+ && self . rank ( ) == other. rank ( )
174
+ }
175
+ }
176
+
177
+ impl Eq for Cpu { }
178
+
179
+ impl PartialOrd for Cpu {
180
+ fn partial_cmp ( & self , other : & Self ) -> Option < std:: cmp:: Ordering > {
181
+ Some ( self . cmp ( other) )
182
+ }
183
+ }
184
+
185
+ impl Ord for Cpu {
186
+ fn cmp ( & self , other : & Self ) -> std:: cmp:: Ordering {
187
+ self . id . cmp ( & other. id )
188
+ . then_with ( || self . min_freq . cmp ( & other. min_freq ) )
189
+ . then_with ( || self . max_freq . cmp ( & other. max_freq ) )
190
+ . then_with ( || self . base_freq . cmp ( & other. base_freq ) )
191
+ . then_with ( || self . trans_lat_ns . cmp ( & other. trans_lat_ns ) )
192
+ . then_with ( || self . l2_id . cmp ( & other. l2_id ) )
193
+ . then_with ( || self . l3_id . cmp ( & other. l3_id ) )
194
+ . then_with ( || self . core_type . cmp ( & other. core_type ) )
195
+ . then_with ( || self . core_id . cmp ( & other. core_id ) )
196
+ . then_with ( || self . llc_id . cmp ( & other. llc_id ) )
197
+ . then_with ( || self . node_id . cmp ( & other. node_id ) )
198
+ . then_with ( || self . package_id . cmp ( & other. package_id ) )
199
+ . then_with ( || self . cluster_id . cmp ( & other. cluster_id ) )
200
+ . then_with ( || self . rank ( ) . cmp ( & other. rank ( ) ) )
201
+ }
202
+ }
203
+
204
+ impl std:: hash:: Hash for Cpu {
205
+ fn hash < H : std:: hash:: Hasher > ( & self , state : & mut H ) {
206
+ self . id . hash ( state) ;
207
+ self . min_freq . hash ( state) ;
208
+ self . max_freq . hash ( state) ;
209
+ self . base_freq . hash ( state) ;
210
+ self . trans_lat_ns . hash ( state) ;
211
+ self . l2_id . hash ( state) ;
212
+ self . l3_id . hash ( state) ;
213
+ self . core_type . hash ( state) ;
214
+ self . core_id . hash ( state) ;
215
+ self . llc_id . hash ( state) ;
216
+ self . node_id . hash ( state) ;
217
+ self . package_id . hash ( state) ;
218
+ self . cluster_id . hash ( state) ;
219
+ self . rank ( ) . hash ( state) ;
220
+ }
221
+ }
222
+
223
+ impl Cpu {
224
+ /// Get the current rank value
225
+ pub fn rank ( & self ) -> usize {
226
+ self . rank . load ( Ordering :: Relaxed ) as usize
227
+ }
228
+
229
+ /// Set the rank value
230
+ pub fn set_rank ( & self , rank : usize ) {
231
+ self . rank . store ( rank as u32 , Ordering :: Relaxed ) ;
232
+ }
129
233
}
130
234
131
235
#[ derive( Clone , Debug , Eq , Ord , PartialEq , PartialOrd ) ]
@@ -187,6 +291,36 @@ pub struct Topology {
187
291
pub all_llcs : BTreeMap < usize , Arc < Llc > > ,
188
292
pub all_cores : BTreeMap < usize , Arc < Core > > ,
189
293
pub all_cpus : BTreeMap < usize , Arc < Cpu > > ,
294
+
295
+ /// Cached list of ranked CPUs
296
+ ranked_cpus : Mutex < Arc < RankedCpuCache > > ,
297
+ }
298
+
299
+ const RANKED_CPU_CACHE_DURATION : Duration = Duration :: from_secs ( 10 ) ;
300
+
301
+ /// Cached list of ranked CPUs
302
+ #[ derive( Debug , Clone ) ]
303
+ pub struct RankedCpuCache {
304
+ /// List of CPU IDs sorted by their ranking (highest to lowest)
305
+ pub cpu_ids : Vec < usize > ,
306
+ /// When this cache was last updated
307
+ pub last_updated : Instant ,
308
+ /// Generation number that increments each time the order changes
309
+ pub generation : u64 ,
310
+ }
311
+
312
+ impl RankedCpuCache {
313
+ pub fn new ( ) -> Self {
314
+ Self {
315
+ cpu_ids : Vec :: new ( ) ,
316
+ last_updated : Instant :: now ( ) - RANKED_CPU_CACHE_DURATION ,
317
+ generation : 0 ,
318
+ }
319
+ }
320
+
321
+ pub fn is_valid ( & self ) -> bool {
322
+ self . last_updated . elapsed ( ) < RANKED_CPU_CACHE_DURATION
323
+ }
190
324
}
191
325
192
326
impl Topology {
@@ -242,6 +376,7 @@ impl Topology {
242
376
all_llcs : topo_llcs,
243
377
all_cores : topo_cores,
244
378
all_cpus : topo_cpus,
379
+ ranked_cpus : Mutex :: new ( Arc :: new ( RankedCpuCache :: new ( ) ) ) ,
245
380
} )
246
381
}
247
382
@@ -309,6 +444,68 @@ impl Topology {
309
444
}
310
445
sibling_cpu
311
446
}
447
+
448
+ /// Returns true if cpu_a has a higher rank than cpu_b.
449
+ /// If ranking is not enabled or either CPU is invalid, returns false.
450
+ pub fn is_higher_ranked ( & self , cpu_a : usize , cpu_b : usize ) -> bool {
451
+ if !* HAS_PREF_RANK {
452
+ return false ;
453
+ }
454
+
455
+ let cpu_a_rank = self . all_cpus . get ( & cpu_a) . map ( |cpu| cpu. rank ( ) ) ;
456
+ let cpu_b_rank = self . all_cpus . get ( & cpu_b) . map ( |cpu| cpu. rank ( ) ) ;
457
+
458
+ match ( cpu_a_rank, cpu_b_rank) {
459
+ ( Some ( rank_a) , Some ( rank_b) ) => rank_a > rank_b,
460
+ _ => false ,
461
+ }
462
+ }
463
+
464
+ /// Returns the cached ranked CPU list.
465
+ /// The list is cached internally and refreshed every 10 seconds.
466
+ /// If preferred core ranking is not enabled, returns an empty cache.
467
+ pub fn get_ranked_cpus ( & self ) -> Arc < RankedCpuCache > {
468
+ if !* HAS_PREF_RANK {
469
+ return Arc :: new ( RankedCpuCache {
470
+ cpu_ids : Vec :: new ( ) ,
471
+ last_updated : Instant :: now ( ) ,
472
+ generation : 0 ,
473
+ } ) ;
474
+ }
475
+
476
+ let mut cache = self . ranked_cpus . lock ( ) . unwrap ( ) ;
477
+ if !cache. is_valid ( ) {
478
+ let mut cpu_ranks: Vec < ( usize , usize ) > = Vec :: new ( ) ;
479
+
480
+ for & cpu_id in self . all_cpus . keys ( ) {
481
+ let cpu_path = Path :: new ( "/sys/devices/system/cpu" )
482
+ . join ( format ! ( "cpu{}" , cpu_id) )
483
+ . join ( "cpufreq" ) ;
484
+
485
+ if let Ok ( rank) = read_file_usize ( & cpu_path. join ( "amd_pstate_prefcore_ranking" ) ) {
486
+ // Update the rank directly in the CPU object
487
+ if let Some ( cpu) = self . all_cpus . get ( & cpu_id) {
488
+ cpu. set_rank ( rank) ;
489
+ }
490
+ cpu_ranks. push ( ( cpu_id, rank) ) ;
491
+ }
492
+ }
493
+
494
+ cpu_ranks. sort_by ( |a, b| {
495
+ let a_val = a. 1 ;
496
+ let b_val = b. 1 ;
497
+ b_val. cmp ( & a_val) . then_with ( || a. 0 . cmp ( & b. 0 ) )
498
+ } ) ;
499
+
500
+ let inner = Arc :: make_mut ( & mut * cache) ;
501
+ inner. cpu_ids . clear ( ) ;
502
+ inner. cpu_ids . extend ( cpu_ranks. iter ( ) . map ( |( id, _) | * id) ) ;
503
+ inner. last_updated = Instant :: now ( ) ;
504
+ inner. generation += 1 ;
505
+ }
506
+
507
+ Arc :: clone ( & cache)
508
+ }
312
509
}
313
510
314
511
/******************************************************
@@ -517,6 +714,7 @@ fn create_insert_cpu(
517
714
node_id : node. id ,
518
715
package_id,
519
716
cluster_id,
717
+ rank : AtomicU32 :: new ( 0 ) ,
520
718
} ) ,
521
719
) ;
522
720
@@ -704,3 +902,13 @@ fn create_numa_nodes(
704
902
}
705
903
Ok ( nodes)
706
904
}
905
+
906
+ fn has_pref_rank ( ) -> bool {
907
+ if !Path :: new ( "/sys/devices/system/cpu/amd_pstate/prefcore" ) . exists ( ) {
908
+ return false ;
909
+ }
910
+ match std:: fs:: read_to_string ( "/sys/devices/system/cpu/amd_pstate/prefcore" ) {
911
+ Ok ( contents) => contents. trim ( ) == "enabled" ,
912
+ Err ( _) => false ,
913
+ }
914
+ }
0 commit comments