Skip to content

V6.2 timerslack+cgroups #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion fs/proc/base.c
Original file line number Diff line number Diff line change
Expand Up @@ -2611,7 +2611,7 @@ static int timerslack_ns_show(struct seq_file *m, void *v)
}

task_lock(p);
seq_printf(m, "%llu\n", p->timer_slack_ns);
seq_printf(m, "%llu\n", get_task_timer_slack_ns(p));
task_unlock(p);

out:
Expand Down
7 changes: 4 additions & 3 deletions fs/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ static long __estimate_accuracy(struct timespec64 *tv)

u64 select_estimate_accuracy(struct timespec64 *tv)
{
u64 ret;
u64 ret, timer_slack;
struct timespec64 now;

/*
Expand All @@ -88,8 +88,9 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
ktime_get_ts64(&now);
now = timespec64_sub(*tv, now);
ret = __estimate_accuracy(&now);
if (ret < current->timer_slack_ns)
return current->timer_slack_ns;
timer_slack = get_task_timer_slack_ns(current);
if (ret < timer_slack)
return timer_slack;
return ret;
}

Expand Down
9 changes: 9 additions & 0 deletions include/linux/cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,15 @@ struct cgroup {
int nr_dying_descendants;
int max_descendants;

/*
* The default process time slacks:
* Setting the timer_slack_ns set's this (and ancestry) cgroups to this
* slack. Defaults to U64_MAX when unset. When timer_slack_ns is unset,
* the parent timer slack from default_timer_slack_ns is used.
*/
u64 timer_slack_ns;
u64 default_timer_slack_ns;

/*
* Each non-empty css_set associated with this cgroup contributes
* one to nr_populated_csets. The counter is zero iff this cgroup
Expand Down
11 changes: 11 additions & 0 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *parent);
struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *css);
struct cgroup_subsys_state *
css_filter_next_descendant_pre(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *root,
bool (*filter)(struct cgroup_subsys_state *pos, void *data),
void *filter_data);

struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos);
struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *css);
Expand Down Expand Up @@ -243,6 +249,11 @@ void css_task_iter_end(struct css_task_iter *it);
for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
(pos) = css_next_descendant_pre((pos), (css)))

#define css_filter_for_each_descendant_pre(pos, css, filter, filter_data) \
for ((pos) = css_filter_next_descendant_pre(NULL, (css), (filter), (filter_data)); \
(pos); \
(pos) = css_filter_next_descendant_pre((pos), (css), (filter), (filter_data)))

/**
* css_for_each_descendant_post - post-order walk of a css's descendants
* @pos: the css * to use as the loop cursor
Expand Down
78 changes: 54 additions & 24 deletions include/linux/hrtimer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#define _LINUX_HRTIMER_H

#include <linux/hrtimer_defs.h>
#include <linux/rbtree.h>
#include <linux/rbtree_augmented.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/percpu.h>
Expand Down Expand Up @@ -97,14 +97,15 @@ enum hrtimer_restart {
/**
* struct hrtimer - the basic hrtimer structure
* @node: timerqueue node, which also manages node.expires,
* the absolute expiry time in the hrtimers internal
* the earliest expiry time in the hrtimers internal
* representation. The time is related to the clock on
* which the timer is based. Is setup by adding
* slack to the _softexpires value. For non range timers
* identical to _softexpires.
* @_softexpires: the absolute earliest expiry time of the hrtimer.
* The time which was given as expiry time when the timer
* was armed.
* which the timer is based.
* @_hardexpires: The absolutely last time this timer should expire.
* This is the timer expiry time with the timer slack added to it.
* For non range timers identical to node.expires.
* @_subtree_least_expires: The least hard expiry time among all the nodes in
* the subtree from this node, i.e. when the next timer should
* fire.
* @function: timer expiry callback function
* @base: pointer to the timer base (per cpu and per clock)
* @state: state information (See bit values above)
Expand All @@ -117,7 +118,8 @@ enum hrtimer_restart {
*/
struct hrtimer {
struct timerqueue_node node;
ktime_t _softexpires;
ktime_t _hardexpires;
ktime_t _subtree_least_expires;
enum hrtimer_restart (*function)(struct hrtimer *);
struct hrtimer_clock_base *base;
u8 state;
Expand Down Expand Up @@ -240,66 +242,88 @@ struct hrtimer_cpu_base {
static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
{
timer->node.expires = time;
timer->_softexpires = time;
timer->_hardexpires = time;
}

static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
{
timer->_softexpires = time;
timer->node.expires = ktime_add_safe(time, delta);
timer->node.expires = time;
timer->_hardexpires = ktime_add_safe(time, delta);
}

static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, u64 delta)
{
timer->_softexpires = time;
timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta));
timer->node.expires = time;
timer->_hardexpires = ktime_add_safe(time, ns_to_ktime(delta));
}


static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
{
timer->node.expires = tv64;
timer->_softexpires = tv64;
timer->_hardexpires = tv64;
}

static inline void hrtimer_set_subtree_least_expires(struct hrtimer *timer, ktime_t time)
{
timer->_subtree_least_expires = time;
}

static inline void hrtimer_set_subtree_least_expires_tv64(struct hrtimer *timer, s64 tv64)
{
timer->_subtree_least_expires = tv64;
}

static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
{
timer->node.expires = ktime_add_safe(timer->node.expires, time);
timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
timer->_hardexpires = ktime_add_safe(timer->_hardexpires, time);
}

static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns)
{
timer->node.expires = ktime_add_ns(timer->node.expires, ns);
timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
timer->_hardexpires = ktime_add_ns(timer->_hardexpires, ns);
}

static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
{
return timer->node.expires;
return timer->_hardexpires;
}

static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
{
return timer->_softexpires;
return timer->node.expires;
}

static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
{
return timer->node.expires;
return timer->_hardexpires;
}

static inline ktime_t hrtimer_get_subtree_least_expires(const struct hrtimer *timer)
{
return timer->_subtree_least_expires;
}

static inline s64 hrtimer_get_subtree_least_expires_tv64(const struct hrtimer *timer)
{
return timer->_subtree_least_expires;
}

static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
{
return timer->_softexpires;
return timer->node.expires;
}

static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
{
return ktime_to_ns(timer->node.expires);
return ktime_to_ns(timer->_hardexpires);
}

static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
{
return ktime_sub(timer->node.expires, timer->base->get_time());
return ktime_sub(timer->_hardexpires, timer->base->get_time());
}

static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
Expand Down Expand Up @@ -329,7 +353,7 @@ extern unsigned int hrtimer_resolution;
static inline ktime_t
__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
{
ktime_t rem = ktime_sub(timer->node.expires, now);
ktime_t rem = ktime_sub(timer->_hardexpires, now);

/*
* Adjust relative timers for the extra we added in
Expand Down Expand Up @@ -523,6 +547,12 @@ extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
/* Soft interrupt function to run the hrtimer queues: */
extern void hrtimer_run_queues(void);

#ifdef CONFIG_HIGH_RES_TIMERS
extern void hrtimer_run_softexpired_timers(void);
#else
static inline void hrtimer_run_softexpired_timers(void) {};
#endif

/* Bootup initialization: */
extern void __init hrtimers_init(void);

Expand Down
51 changes: 51 additions & 0 deletions include/linux/rbtree_augmented.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ struct rb_augment_callbacks {
void (*propagate)(struct rb_node *node, struct rb_node *stop);
void (*copy)(struct rb_node *old, struct rb_node *new);
void (*rotate)(struct rb_node *old, struct rb_node *new);
void (*insert)(struct rb_node *parent, struct rb_node *node);
};

extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
Expand Down Expand Up @@ -60,6 +61,56 @@ rb_insert_augmented_cached(struct rb_node *node,
rb_insert_augmented(node, &root->rb_root, augment);
}

static __always_inline bool
__rb_add_augmented(struct rb_node *node, struct rb_node **link,
bool (*less)(struct rb_node *, const struct rb_node *),
const struct rb_augment_callbacks *augment)
{
struct rb_node *parent = NULL;
bool leftmost = true;

while (*link) {
parent = *link;
if (augment)
augment->insert(parent, node);

if (less(node, parent)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = false;
}
}
rb_link_node(node, parent, link);

return leftmost;
}

static __always_inline struct rb_node *
rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree,
bool (*less)(struct rb_node *, const struct rb_node *),
const struct rb_augment_callbacks *augment)
{
struct rb_node **link = &tree->rb_root.rb_node;
bool leftmost;

leftmost = __rb_add_augmented(node, link, less, augment);
rb_insert_augmented_cached(node, tree, leftmost, augment);

return leftmost ? node : NULL;
}

static __always_inline void
rb_add_augmented(struct rb_node *node, struct rb_root *tree,
bool (*less)(struct rb_node *, const struct rb_node *),
const struct rb_augment_callbacks *augment)
{
struct rb_node **link = &tree->rb_node;

__rb_add_augmented(node, link, less, augment);
rb_insert_augmented(node, tree, augment);
}

/*
* Template for declaring augmented rbtree callbacks (generic case)
*
Expand Down
20 changes: 20 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1346,9 +1346,13 @@ struct task_struct {
/*
* Time slack values; these are used to round up poll() and
* select() etc timeout values. These are in nanoseconds.
* The default timer slack used is 50 usec.
* The effective timer slack should be retrieved with
* get_task_timer_slack_ns(task)
*/
u64 timer_slack_ns;
u64 default_timer_slack_ns;
#define TASK_TIMER_SLACK_NS 50000

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
unsigned int kasan_depth;
Expand Down Expand Up @@ -2420,4 +2424,20 @@ static inline void sched_core_fork(struct task_struct *p) { }

extern void sched_set_stop_task(int cpu, struct task_struct *stop);

#ifdef CONFIG_CGROUPS
extern u64 cgroup_timer_slack_ns(const struct task_struct *task);
#else
static inline u64 cgroup_timer_slack_ns(const struct task_struct *task)
{
return TASK_TIMER_SLACK_NS;
}
#endif

static inline u64 get_task_timer_slack_ns(const struct task_struct *task)
{
if (task->timer_slack_ns == U64_MAX)
return cgroup_timer_slack_ns(task);
return task->timer_slack_ns;
}

#endif
30 changes: 28 additions & 2 deletions include/linux/timerqueue.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#ifndef _LINUX_TIMERQUEUE_H
#define _LINUX_TIMERQUEUE_H

#include <linux/rbtree.h>
#include <linux/rbtree_augmented.h>
#include <linux/ktime.h>


Expand All @@ -11,8 +11,15 @@ struct timerqueue_node {
ktime_t expires;
};

/**
* stuct timerqueue_head - timerqueue base
* @head: rbtree root
* @augment: If not NULL, contains augmentation callbacks to use when
* modifying timerqueue rbtree.
*/
struct timerqueue_head {
struct rb_root_cached rb_root;
const struct rb_augment_callbacks *augment;
};


Expand Down Expand Up @@ -53,8 +60,27 @@ static inline bool timerqueue_node_expires(struct timerqueue_node *node)
return node->expires;
}

static inline void timerqueue_init_head(struct timerqueue_head *head)
static inline
void timerqueue_init_head_augmented(struct timerqueue_head *head,
const struct rb_augment_callbacks *augment)
{
head->rb_root = RB_ROOT_CACHED;
head->augment = augment;
}

static inline void timerqueue_init_head(struct timerqueue_head *head)
{
timerqueue_init_head_augmented(head, NULL);
}

static inline
struct timerqueue_node *timerqueue_getroot(const struct timerqueue_head *head)
{
struct rb_node *rbnode = head->rb_root.rb_root.rb_node;

if (!rbnode)
return NULL;

return rb_entry(rbnode, struct timerqueue_node, node);
}
#endif /* _LINUX_TIMERQUEUE_H */
Loading