Skip to content

Commit 653097a

Browse files
committed
Fix some hyperthreading errors.
1.When there are multiple NUMA nodes and hyper-threading causes adjacent logical cores to share a physical core (e.g., common -> avail[i] = 0x5555555555555555UL), the numa_mapping function should not use a bitmask for filtering, as this would lead to redundant masking with the subsequent local_cpu_map function. 2.In the scenario described above, the final_num_procs parameter cannot accurately represent the actual number of valid CPU cores. The num_procs parameter can be used as a replacement, so the final_num_procs parameter has been removed.
1 parent fe220a0 commit 653097a

File tree

1 file changed

+8
-24
lines changed

1 file changed

+8
-24
lines changed

driver/others/init.c

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ typedef struct {
122122

123123
int num_nodes;
124124
int num_procs;
125-
int final_num_procs;
126125
unsigned long avail [MAX_BITMASK_LEN];
127126
int avail_count;
128127
unsigned long cpu_info [MAX_CPUS];
@@ -181,18 +180,6 @@ static inline int popcount(unsigned long number) {
181180
return count;
182181
}
183182

184-
static inline int rcount(unsigned long number) {
185-
186-
int count = -1;
187-
188-
while ((number > 0) && ((number & 0)) == 0) {
189-
count ++;
190-
number >>= 1;
191-
}
192-
193-
return count;
194-
}
195-
196183
/***
197184
Known issue: The number of CPUs/cores should less
198185
than sizeof(unsigned long). On 64 bits, the limit
@@ -391,7 +378,7 @@ static void numa_mapping(void) {
391378
core = 0;
392379
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
393380
bitmask_idx = CPUELT(cpu);
394-
if (common -> node_info[node][bitmask_idx] & common -> avail[bitmask_idx] & CPUMASK(cpu)) {
381+
if (common -> node_info[node][bitmask_idx]) {
395382
common -> cpu_info[count] = WRITE_CORE(core) | WRITE_NODE(node) | WRITE_CPU(cpu);
396383
count ++;
397384
core ++;
@@ -415,7 +402,7 @@ static void numa_mapping(void) {
415402
}
416403
}
417404
for (i = 0; i < MAX_BITMASK_LEN; i++)
418-
cpu_count += popcount(common -> node_info[current_node][i] & common -> avail[i]);
405+
cpu_count += popcount(common -> node_info[current_node][i]);
419406

420407
/*
421408
* If all the processes can be accommodated in the
@@ -570,13 +557,13 @@ static void disable_affinity(void) {
570557
/* }else */
571558
/* lprocmask = (1UL << common -> final_num_procs) - 1; */
572559

573-
bitmask_idx = CPUELT(common -> final_num_procs);
560+
bitmask_idx = CPUELT(common -> num_procs);
574561

575562
for(i=0; i< bitmask_idx; i++){
576563
lprocmask[count++] = 0xFFFFFFFFFFFFFFFFUL;
577564
}
578-
if(CPUMASK(common -> final_num_procs) != 1){
579-
lprocmask[count++] = CPUMASK(common -> final_num_procs) - 1;
565+
if(CPUMASK(common -> num_procs) != 1){
566+
lprocmask[count++] = CPUMASK(common -> num_procs) - 1;
580567
}
581568
lprocmask_count = count;
582569

@@ -731,12 +718,12 @@ static void local_cpu_map(void) {
731718

732719
cpu ++;
733720

734-
} while ((mapping < numprocs) && (cpu < common -> final_num_procs));
721+
} while ((mapping < numprocs) && (cpu < common -> num_procs));
735722

736723
disable_mapping = 0;
737724

738725
if ((mapping < numprocs) || (numprocs == 1)) {
739-
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) {
726+
for (cpu = 0; cpu < common -> num_procs; cpu ++) {
740727
if (common -> cpu_use[cpu] == pshmid) common -> cpu_use[cpu] = 0;
741728
}
742729
disable_mapping = 1;
@@ -930,10 +917,7 @@ void gotoblas_affinity_init(void) {
930917

931918
if (common -> num_nodes > 1) numa_mapping();
932919

933-
common -> final_num_procs = 0;
934-
for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
935-
936-
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;
920+
for (cpu = 0; cpu < common -> num_procs; cpu ++) common -> cpu_use[cpu] = 0;
937921

938922
common -> magic = SH_MAGIC;
939923

0 commit comments

Comments
 (0)