@@ -436,12 +436,12 @@ end
436
436
437
437
438
438
#
439
- # Check gear processes belong to cgroups
439
+ # Check gear threads belong to cgroups
440
440
#
441
- def check_cgroup_procs
442
- verbose("checking cgroups processes ")
441
+ def check_cgroup_tasks
442
+ verbose("checking cgroups tasks ")
443
443
444
- ### Gather current procs running ###
444
+ ### Gather current threads running ###
445
445
min_uid = $CONF.get('GEAR_MIN_UID').to_i
446
446
max_uid = $CONF.get('GEAR_MAX_UID').to_i
447
447
@@ -455,10 +455,11 @@ def check_cgroup_procs
455
455
456
456
verbose("determining node uid range: #{min_uid} to #{max_uid}")
457
457
458
- all_user_procs = %x[/bin/ps -e -o uid,pid].split("\n")
459
- ps_procs = Hash.new{|hash, key| hash[key] = Array.new}
460
- all_user_procs.each do |line|
461
- uid,pid = line.split[0,2]
458
+ # Bug 1067107 - Check tids against tasks instead of pids against cgroup procs
459
+ all_user_threads = %x[/bin/ps -e H -o uid,pid,tid].split("\n")
460
+ ps_threads = Hash.new{|hash, key| hash[key] = Hash.new}
461
+ all_user_threads.each do |line|
462
+ uid,pid,tid = line.split[0,3]
462
463
uid = uid.to_i
463
464
464
465
if uid.between?(min_uid, max_uid)
@@ -470,34 +471,34 @@ def check_cgroup_procs
470
471
end
471
472
472
473
uname = passwd_lines[0].name
473
- ps_procs[uname] += [pid]
474
- ps_procs[uname].uniq!
474
+ ps_threads[uname][tid] = pid
475
475
end
476
476
end
477
477
478
- ### Gather cgroup procs ###
479
- cgroup_procs = Hash.new{|hash, key| hash[key] = Hash.new{|hash, key| hash[key] = Array.new}}
478
+ ### Gather cgroup tasks ###
479
+ tasks = Hash.new{|hash, key| hash[key] = Hash.new{|hash, key| hash[key] = Array.new}}
480
480
481
481
# Support mounting cgroup controllers under /cgroup/all or
482
482
# /cgroup/<controller>
483
- Dir.glob("/cgroup/*/openshift/*/cgroup.procs ").each do |file|
483
+ Dir.glob("/cgroup/*/openshift/*/tasks ").each do |file|
484
484
_, _, controller, _, uuid, _ = file.split("/")
485
485
lines = []
486
486
IO.foreach(file).each { |line| lines << line.strip }
487
- cgroup_procs [controller][uuid] = lines
487
+ tasks [controller][uuid] = lines
488
488
end
489
489
490
490
### Compare ###
491
- ps_procs .each do |uuid,procs |
492
- cgroup_procs .each do |controller,controller_procs |
493
- missing = procs - controller_procs [uuid]
494
- missing.each do |pid |
495
- # ensure the process is still running and not defunct before failing
496
- # this fixes both the transient process and the defunct process
491
+ ps_threads .each do |uuid,threads |
492
+ tasks .each do |controller,controller_tasks |
493
+ missing = threads.keys - controller_tasks [uuid]
494
+ missing.each do |tid |
495
+ # ensure the thread is still running and not defunct before failing
496
+ # this fixes both the transient threads and the defunct thread
497
497
# detection problems
498
498
begin
499
- if File.read("/proc/#{pid}/status") !~ /^State:\s+Z/
500
- user_fail(uuid, "#{uuid} has a process missing from cgroups: #{pid} cgroups controller: #{controller}")
499
+ pid = threads[tid]
500
+ if File.read("/proc/#{pid}/task/#{tid}/status") !~ /^State:\s+Z/
501
+ user_fail(uuid, "#{uuid} has a thread: tid:#{tid}, pid:#{pid} missing from cgroups controller: #{controller}")
501
502
end
502
503
rescue Errno::ENOENT
503
504
end
@@ -970,7 +971,7 @@ if __FILE__ == $0
970
971
check_service_contexts
971
972
check_semaphores
972
973
check_cgroup_config
973
- check_cgroup_procs
974
+ check_cgroup_tasks
974
975
check_tc_config
975
976
check_quotas
976
977
check_users
0 commit comments