|
Message-ID: <CABeRdtp9ERutdeKLpztyLpV=8VOqiJPnES_3c_V4aLS_dZ_-jg@mail.gmail.com> Date: Sat, 4 Feb 2017 01:42:56 +0900 From: Hoeun Ryu <hoeun.ryu@...il.com> To: Michal Hocko <mhocko@...nel.org> Cc: Andrew Morton <akpm@...ux-foundation.org>, Ingo Molnar <mingo@...nel.org>, Andy Lutomirski <luto@...nel.org>, Kees Cook <keescook@...omium.org>, "Eric W. Biederman" <ebiederm@...ssion.com>, Mateusz Guzik <mguzik@...hat.com>, linux-kernel@...r.kernel.org, kernel-hardening@...ts.openwall.com Subject: Re: [PATCH 1/3] fork: dynamically allocate cache array for vmapped stacks using cpuhp On Sat, Feb 4, 2017 at 12:39 AM, Michal Hocko <mhocko@...nel.org> wrote: > On Sat 04-02-17 00:30:05, Hoeun Ryu wrote: >> Using virtually mapped stack, kernel stacks are allocated via vmalloc. >> In the current implementation, two stacks per cpu can be cached when >> tasks are freed and the cached stacks are used again in task duplications. >> but the array for the cached stacks is statically allocated by per-cpu api. >> In this new implementation, the array for the cached stacks are dynamically >> allocted and freed by cpu hotplug callbacks and the cached stacks are freed >> when cpu is down. setup for cpu hotplug is established in fork_init(). > > Why do we want this? I can see that the follow up patch makes the number > configurable but the changelog doesn't describe the motivation for that. > Which workload would benefit from a higher value? > The key difference of this implementation, the cached stacks for a cpu is freed when a cpu is down. so the cached stacks are no longer wasted. In the current implementation, the cached stacks for a cpu still remain on the system when a cpu is down. I think we could imagine what if a machine has many cpus and someone wants to have bigger size of stack caches. >> Signed-off-by: Hoeun Ryu <hoeun.ryu@...il.com> >> --- >> kernel/fork.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++------------- >> 1 file changed, 64 insertions(+), 17 deletions(-) >> >> diff --git a/kernel/fork.c b/kernel/fork.c >> index 61284d8..54421a9 100644 >> --- a/kernel/fork.c >> +++ b/kernel/fork.c >> @@ -167,26 +167,71 @@ void __weak arch_release_thread_stack(unsigned long *stack) >> * flush. Try to minimize the number of calls by caching stacks. >> */ >> #define NR_CACHED_STACKS 2 >> -static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); >> + >> +struct vm_stack_cache { >> + struct vm_struct **vm_stacks; >> + int nr; >> + int cur; >> +}; >> + >> +static DEFINE_PER_CPU(struct vm_stack_cache, vm_stacks); >> + >> +static int alloc_vm_stack_cache(unsigned int cpu) >> +{ >> + struct vm_stack_cache *vm_stack_cache = &per_cpu(vm_stacks, cpu); >> + struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks; >> + int i; >> + >> + /* if free_vm_stack_cache() didn't free it */ >> + if (!vm_stacks) { >> + vm_stacks = >> + vzalloc(sizeof(struct vm_struct *) * NR_CACHED_STACKS); >> + if (!vm_stacks) >> + return -ENOMEM; >> + } >> + >> + vm_stack_cache->vm_stacks = vm_stacks; >> + vm_stack_cache->cur = 0; >> + vm_stack_cache->nr = 0; >> + >> + return 0; >> +} >> + >> +static int free_vm_stack_cache(unsigned int cpu) >> +{ >> + struct vm_stack_cache *vm_stack_cache = &per_cpu(vm_stacks, cpu); >> + struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks; >> + int i; >> + >> + for (i = 0; i < vm_stack_cache->nr; i++) { >> + vfree(vm_stacks[i]->addr); >> + vm_stacks[i] = NULL; >> + } >> + >> + vm_stack_cache->nr = 0; >> + vm_stack_cache->cur = 0; >> + /* do not free vm_stack[cpu]->vm_stacks itself, reused in allocation */ >> + >> + return 0; >> +} >> + >> #endif >> >> static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) >> { >> #ifdef CONFIG_VMAP_STACK >> + struct vm_stack_cache *vm_stack_cache = >> + &per_cpu(vm_stacks, smp_processor_id()); >> + struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks; >> void *stack; >> - int i; >> >> local_irq_disable(); >> - for (i = 0; i < NR_CACHED_STACKS; i++) { >> - struct vm_struct *s = this_cpu_read(cached_stacks[i]); >> - >> - if (!s) >> - continue; >> - this_cpu_write(cached_stacks[i], NULL); >> - >> - tsk->stack_vm_area = s; >> + if (vm_stack_cache->cur > 0) { >> + struct vm_struct *vm_stack = vm_stacks[--vm_stack_cache->cur]; >> + tsk->stack_vm_area = vm_stack; >> local_irq_enable(); >> - return s->addr; >> + >> + return vm_stack->addr; >> } >> local_irq_enable(); >> >> @@ -216,15 +261,14 @@ static inline void free_thread_stack(struct task_struct *tsk) >> { >> #ifdef CONFIG_VMAP_STACK >> if (task_stack_vm_area(tsk)) { >> + struct vm_stack_cache *vm_stack_cache = >> + &per_cpu(vm_stacks, smp_processor_id()); >> + struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks; >> unsigned long flags; >> - int i; >> >> local_irq_save(flags); >> - for (i = 0; i < NR_CACHED_STACKS; i++) { >> - if (this_cpu_read(cached_stacks[i])) >> - continue; >> - >> - this_cpu_write(cached_stacks[i], tsk->stack_vm_area); >> + if (vm_stack_cache->cur < vm_stack_cache->nr) { >> + vm_stacks[vm_stack_cache->cur++] = tsk->stack_vm_area; >> local_irq_restore(flags); >> return; >> } >> @@ -456,6 +500,9 @@ void __init fork_init(void) >> for (i = 0; i < UCOUNT_COUNTS; i++) { >> init_user_ns.ucount_max[i] = max_threads/2; >> } >> + >> + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "vm_stack_cache", >> + alloc_vm_stack_cache, free_vm_stack_cache); >> } >> >> int __weak arch_dup_task_struct(struct task_struct *dst, >> -- >> 2.7.4 >> > > -- > Michal Hocko > SUSE Labs
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.