|
Message-ID: <20110804112331.GA2563@albatros> Date: Thu, 4 Aug 2011 15:23:31 +0400 From: Vasiliy Kulikov <segoon@...nwall.com> To: kernel-hardening@...ts.openwall.com Subject: Re: procfs {tid,tgid,attr}_allowed mount options Hi, New version. Cleanups/fixes here and there. It lacks net/ restriction, but IMO it is already complicated enough (more than 500 new lines). Such (relatively) simple thing as net_allowed= is an additional care. I'd achieve at least processes restrictions in upstream, after it will come networking. --- fs/proc/Makefile | 2 +- fs/proc/base.c | 286 +++++++++++++++++++++------------------ fs/proc/base_perms.c | 305 +++++++++++++++++++++++++++++++++++++++++ fs/proc/inode.c | 19 +++ fs/proc/internal.h | 25 ++++ fs/proc/root.c | 121 ++++++++++++++++- fs/proc/task_nommu.c | 2 +- include/linux/pid_namespace.h | 13 ++ include/linux/proc_fs.h | 12 ++ kernel/pid.c | 2 + kernel/pid_namespace.c | 10 ++- kernel/sysctl.c | 1 + 12 files changed, 660 insertions(+), 138 deletions(-) --- diff --git a/fs/proc/Makefile b/fs/proc/Makefile index c1c7293..81020f9 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o + proc_tty.o base_perms.o proc-y += cmdline.o proc-y += consoles.o proc-y += cpuinfo.o diff --git a/fs/proc/base.c b/fs/proc/base.c index fc5bc27..33684f7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -98,40 +98,43 @@ * in /proc for a task before it execs a suid executable. */ -struct pid_entry { - char *name; - int len; - mode_t mode; - const struct inode_operations *iop; - const struct file_operations *fop; - union proc_op op; -}; - -#define NOD(NAME, MODE, IOP, FOP, OP) { \ +#define NOD(NAME, MODE, IOP, FOP, OP, PERMS) { \ .name = (NAME), \ .len = sizeof(NAME) - 1, \ .mode = MODE, \ .iop = IOP, \ .fop = FOP, \ .op = OP, \ + .need_perms_check = PERMS, \ } +/* + * XX_PERMS() are files without any ptrace() check. + * However, they have updated uid/gid and permissions on each file operation. + */ #define DIR(NAME, MODE, iops, fops) \ - NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) + NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {}, true) #define LNK(NAME, get_link) \ NOD(NAME, (S_IFLNK|S_IRWXUGO), \ &proc_pid_link_inode_operations, NULL, \ - { .proc_get_link = get_link } ) + { .proc_get_link = get_link }, false) #define REG(NAME, MODE, fops) \ - NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) + NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}, true) +#define REG_PERMS(NAME, MODE, fops) \ + NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}, false) #define INF(NAME, MODE, read) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_info_file_operations, \ - { .proc_read = read } ) + { .proc_read = read }, true ) +#define INF_PERMS(NAME, MODE, read) \ + NOD(NAME, (S_IFREG|(MODE)), \ + NULL, &proc_info_file_operations, \ + { .proc_read = read }, false ) #define ONE(NAME, MODE, show) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ - { .proc_show = show } ) + { .proc_show = show }, true ) + /* * Count the number of hardlinks for the pid_entry table, excluding the . @@ -229,35 +232,12 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task) return ERR_PTR(-EPERM); } -/* - * If current may access user memory in @task return a reference to the - * corresponding mm, otherwise ERR_PTR. - */ -static struct mm_struct *check_mem_permission(struct task_struct *task) -{ - struct mm_struct *mm; - int err; - - /* - * Avoid racing if task exec's as we might get a new mm but validate - * against old credentials. - */ - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = __check_mem_permission(task); - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; int err; - err = mutex_lock_killable(&task->signal->cred_guard_mutex); + err = mutex_lock_killable(&task->signal->cred_guard_mutex); if (err) return ERR_PTR(err); @@ -327,7 +307,6 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer) return res; } - #ifdef CONFIG_KALLSYMS /* * Provides a wchan file via kallsyms in a proper one-value-per-file format. @@ -350,23 +329,6 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) } #endif /* CONFIG_KALLSYMS */ -static int lock_trace(struct task_struct *task) -{ - int err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return err; - if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { - mutex_unlock(&task->signal->cred_guard_mutex); - return -EPERM; - } - return 0; -} - -static void unlock_trace(struct task_struct *task) -{ - mutex_unlock(&task->signal->cred_guard_mutex); -} - #ifdef CONFIG_STACKTRACE #define MAX_STACK_TRACE_DEPTH 64 @@ -388,16 +350,13 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, trace.entries = entries; trace.skip = 0; - err = lock_trace(task); - if (!err) { - save_stack_trace_tsk(task, &trace); + save_stack_trace_tsk(task, &trace); - for (i = 0; i < trace.nr_entries; i++) { - seq_printf(m, "[<%pK>] %pS\n", - (void *)entries[i], (void *)entries[i]); - } - unlock_trace(task); + for (i = 0; i < trace.nr_entries; i++) { + seq_printf(m, "[<%pK>] %pS\n", + (void *)entries[i], (void *)entries[i]); } + kfree(entries); return err; @@ -563,9 +522,7 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer) { long nr; unsigned long args[6], sp, pc; - int res = lock_trace(task); - if (res) - return res; + int res; if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) res = sprintf(buffer, "running\n"); @@ -577,7 +534,7 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer) nr, args[0], args[1], args[2], args[3], args[4], args[5], sp, pc); - unlock_trace(task); + return res; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ @@ -589,18 +546,18 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer) /* permission checks */ static int proc_fd_access_allowed(struct inode *inode) { - struct task_struct *task; - int allowed = 0; - /* Allow access to a task's file descriptors if it is us or we - * may use ptrace attach to the process and find out that - * information. - */ - task = get_proc_task(inode); - if (task) { - allowed = ptrace_may_access(task, PTRACE_MODE_READ); - put_task_struct(task); - } - return allowed; + struct task_struct *task; + int allowed = 0; + /* Allow access to a task's file descriptors if it is us or we + * may use ptrace attach to the process and find out that + * information. + */ + task = get_proc_task(inode); + if (task) { + allowed = ptrace_may_access(task, PTRACE_MODE_READ); + put_task_struct(task); + } + return allowed; } int proc_setattr(struct dentry *dentry, struct iattr *attr) @@ -839,7 +796,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, if (!page) goto out; - mm = check_mem_permission(task); + mm = __check_mem_permission(task); ret = PTR_ERR(mm); if (IS_ERR(mm)) goto out_free; @@ -902,7 +859,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, if (!page) goto out_task; - mm = check_mem_permission(task); + mm = __check_mem_permission(task); copied = PTR_ERR(mm); if (IS_ERR(mm)) goto out_free; @@ -1758,6 +1715,27 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) /* dentry stuff */ +static void pid_revalidate_perms(struct dentry *dentry); + +void __pid_revalidate(struct inode *inode, struct task_struct *task) +{ + const struct cred *cred; + + if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || + task_dumpable(task)) { + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); + } else { + inode->i_uid = 0; + inode->i_gid = 0; + } + inode->i_mode &= ~(S_ISUID | S_ISGID); + security_task_to_inode(task, inode); +} + /* * Exceptional case: normally we are not allowed to unhash a busy * directory. In this case, however, we can do it - no aliasing problems @@ -1777,7 +1755,6 @@ int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode; struct task_struct *task; - const struct cred *cred; if (nd && nd->flags & LOOKUP_RCU) return -ECHILD; @@ -1785,20 +1762,10 @@ int pid_revalidate(struct dentry *dentry, struct nameidata *nd) inode = dentry->d_inode; task = get_proc_task(inode); + pid_revalidate_perms(dentry); + if (task) { - if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || - task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = 0; - inode->i_gid = 0; - } - inode->i_mode &= ~(S_ISUID | S_ISGID); - security_task_to_inode(task, inode); + __pid_revalidate(inode, task); put_task_struct(task); return 1; } @@ -2200,7 +2167,9 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, ei = PROC_I(inode); ei->fd = fd; inode->i_mode = S_IFREG | S_IRUSR; - inode->i_fop = &proc_fdinfo_file_operations; + ei->real_fops = &proc_fdinfo_file_operations; + inode->i_fop = &proc_pid_perms_fops; + d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ @@ -2238,7 +2207,6 @@ static const struct inode_operations proc_fdinfo_inode_operations = { .setattr = proc_setattr, }; - static struct dentry *proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -2255,11 +2223,16 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, inode->i_mode = p->mode; if (S_ISDIR(inode->i_mode)) inode->i_nlink = 2; /* Use getattr to fix if necessary */ + if (p->iop) inode->i_op = p->iop; if (p->fop) - inode->i_fop = p->fop; + ei->real_fops = p->fop; ei->op = p->op; + inode->i_fop = &proc_pid_perms_fops; + + ei->dirent = p; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ @@ -2417,15 +2390,9 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, if (copy_from_user(page, buf, count)) goto out_free; - /* Guard against adverse ptrace interaction */ - length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); - if (length < 0) - goto out_free; - length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, (void*)page, count); - mutex_unlock(&task->signal->cred_guard_mutex); out_free: free_page((unsigned long) page); out: @@ -2469,7 +2436,7 @@ static struct dentry *proc_attr_dir_lookup(struct inode *dir, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); } -static const struct inode_operations proc_attr_dir_inode_operations = { +const struct inode_operations proc_attr_dir_inode_operations = { .lookup = proc_attr_dir_lookup, .getattr = pid_getattr, .setattr = proc_setattr, @@ -2617,7 +2584,7 @@ static const struct inode_operations proc_self_inode_operations = { */ static const struct pid_entry proc_base_stuff[] = { NOD("self", S_IFLNK|S_IRWXUGO, - &proc_self_inode_operations, NULL, {}), + &proc_self_inode_operations, NULL, {}, false), }; static struct dentry *proc_base_instantiate(struct inode *dir, @@ -2653,9 +2620,12 @@ static struct dentry *proc_base_instantiate(struct inode *dir, inode->i_size = 64; if (p->iop) inode->i_op = p->iop; + if (p->fop) - inode->i_fop = p->fop; + ei->real_fops = p->fop; ei->op = p->op; + inode->i_fop = &proc_pid_perms_fops; + d_add(dentry, inode); error = NULL; out: @@ -2708,9 +2678,6 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) struct task_io_accounting acct = task->ioac; unsigned long flags; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - return -EACCES; - if (whole && lock_task_sighand(task, &flags)) { struct task_struct *t = task; @@ -2751,12 +2718,8 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - int err = lock_trace(task); - if (!err) { - seq_printf(m, "%08x\n", task->personality); - unlock_trace(task); - } - return err; + seq_printf(m, "%08x\n", task->personality); + return 0; } /* @@ -2773,8 +2736,8 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif - REG("environ", S_IRUSR, proc_environ_operations), - INF("auxv", S_IRUSR, proc_pid_auxv), + REG_PERMS("environ", S_IRUSR, proc_environ_operations), + INF_PERMS("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUGO, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), @@ -2791,9 +2754,9 @@ static const struct pid_entry tgid_base_stuff[] = { INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), - REG("maps", S_IRUGO, proc_maps_operations), + REG_PERMS("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, proc_numa_maps_operations), + REG_PERMS("numa_maps", S_IRUGO, proc_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), @@ -2804,7 +2767,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("mountstats", S_IRUSR, proc_mountstats_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), - REG("smaps", S_IRUGO, proc_smaps_operations), + REG_PERMS("smaps", S_IRUGO, proc_smaps_operations), REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY @@ -2867,7 +2830,7 @@ static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *de tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } -static const struct inode_operations proc_tgid_base_inode_operations = { +const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, @@ -2972,9 +2935,10 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, if (!inode) goto out; - inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; - inode->i_fop = &proc_tgid_base_operations; + inode->i_fop = &proc_pid_perms_fops; + PROC_I(inode)->real_fops = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, @@ -3122,8 +3086,8 @@ static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), - REG("environ", S_IRUSR, proc_environ_operations), - INF("auxv", S_IRUSR, proc_pid_auxv), + REG_PERMS("environ", S_IRUSR, proc_environ_operations), + INF_PERMS("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUGO, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), @@ -3137,9 +3101,9 @@ static const struct pid_entry tid_base_stuff[] = { INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), - REG("maps", S_IRUGO, proc_maps_operations), + REG_PERMS("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, proc_numa_maps_operations), + REG_PERMS("numa_maps", S_IRUGO, proc_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), @@ -3149,7 +3113,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("mountinfo", S_IRUGO, proc_mountinfo_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), - REG("smaps", S_IRUGO, proc_smaps_operations), + REG_PERMS("smaps", S_IRUGO, proc_smaps_operations), REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY @@ -3209,7 +3173,7 @@ static const struct file_operations proc_tid_base_operations = { .llseek = default_llseek, }; -static const struct inode_operations proc_tid_base_inode_operations = { +const struct inode_operations proc_tid_base_inode_operations = { .lookup = proc_tid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, @@ -3226,7 +3190,8 @@ static struct dentry *proc_task_instantiate(struct inode *dir, goto out; inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tid_base_inode_operations; - inode->i_fop = &proc_tid_base_operations; + inode->i_fop = &proc_pid_perms_fops; + PROC_I(inode)->real_fops = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, @@ -3445,3 +3410,58 @@ static const struct file_operations proc_task_operations = { .readdir = proc_task_readdir, .llseek = default_llseek, }; + +const int perms_size[] = { + [PROC_PERMS_NTID] = ARRAY_SIZE(tid_base_stuff), + [PROC_PERMS_NTGID] = ARRAY_SIZE(tgid_base_stuff), + [PROC_PERMS_NATTR] = ARRAY_SIZE(attr_dir_stuff), +}; + +const struct pid_entry *dir_ents[] = { + [PROC_PERMS_NTID] = tid_base_stuff, + [PROC_PERMS_NTGID] = tgid_base_stuff, + [PROC_PERMS_NATTR] = attr_dir_stuff, +}; + +static void pid_revalidate_perms(struct dentry *dentry) +{ + struct inode *inode; + struct pid_namespace *pid_ns; + struct proc_perms *perms; + const struct inode_operations *parent_op; + struct proc_inode *ei; + + pid_ns = dentry->d_sb->s_fs_info; + perms = &pid_ns->proc_perms; + inode = dentry->d_inode; + parent_op = dentry->d_parent->d_inode->i_op; + ei = PROC_I(inode); + + /* We never ever chmod symlinks (XXX: symlinks must be protected too) */ + if (S_ISLNK(inode->i_mode)) + return; + + if (inode->i_op == &proc_tgid_base_inode_operations || + inode->i_op == &proc_tid_base_inode_operations) { + mode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; + mode_t mask = perms->proc_pid_allowed ? ~0 : ~077; + inode->i_mode = mode & mask; + } else if (parent_op == &proc_tgid_base_inode_operations) { + inode->i_mode = get_ent_perms(ei->dirent, + perms, + PROC_PERMS_NTGID, + (const struct pid_entry *)ei->dirent - &tgid_base_stuff[0]); + } else if (parent_op == &proc_tid_base_inode_operations) { + inode->i_mode = get_ent_perms(ei->dirent, + perms, + PROC_PERMS_NTID, + (const struct pid_entry *)ei->dirent - &tid_base_stuff[0]); + } else if (parent_op == &proc_attr_dir_inode_operations) { + inode->i_mode = get_ent_perms(ei->dirent, + perms, + PROC_PERMS_NATTR, + (struct pid_entry *)ei->dirent - &attr_dir_stuff[0]); + } else { + pr_err("other not found (%s)\n", dentry->d_name.name); + } +} diff --git a/fs/proc/base_perms.c b/fs/proc/base_perms.c new file mode 100644 index 0000000..26e157a --- /dev/null +++ b/fs/proc/base_perms.c @@ -0,0 +1,305 @@ +/* + * linux/fs/proc/base_perms.c + * + * Copyright (C) 2011 Vasiliy Kulikov + * + * proc base directory permissions handling functions + */ + +#include <asm/uaccess.h> +#include <linux/proc_fs.h> +#include <linux/string.h> +#include <linux/ptrace.h> +#include <linux/poll.h> +#include <linux/pid_namespace.h> +#include "internal.h" + + +/* Similar to acl_permission_check(), but with procfs specific changes */ +static int proc_permission_check(struct task_struct *task, struct inode *inode, int mask) +{ + mode_t mode = inode->i_mode; + //uid_t uid = inode->i_uid; + gid_t gid = inode->i_gid; + int pmode = (mask == MAY_WRITE) ? PTRACE_MODE_ATTACH : PTRACE_MODE_READ; + + if (mode & mask) + return 0; + + /* XXX: This should be updated conformably to user ns code changes. + * Or maybe just remove it as ns users may not have any access to procfs? */ + if (current_user_ns() != inode_userns(inode)) + return -EACCES; + + if (in_group_p(gid) && ((mode >> 3) & mask)) + return 0; + + /* + * The major changes compared to acl_permission_check(): + * We check ptrace ability instead of uid comparison. + */ + if (ptrace_may_access(task, pmode)) + return 0; + + return -EACCES; +} + +static void unlock_pid_trace(struct task_struct *task); + +/* + * All operations with /proc/PID/ directory MUST be processed + * under lock_pid_trace() to avoid execve() races. + * + * It checks permissions similar to acl_permission_check() and probably holds + * (*task)->signal->cred_guard_mutex. If so, *task would point to the target + * task, NULL otherwise. To unlock the mutex call unlock_pid_trace() with the + * same *task argument. + */ +static int lock_pid_trace(struct dentry *dentry, + int mask, + struct task_struct **task) +{ + int err; + struct inode *inode = dentry->d_inode; + const struct pid_entry *pe = PROC_I(inode)->dirent; + + if (pe && !pe->need_perms_check) { + /* OK, the file checks permissions on his own */ + *task = NULL; + return 0; + } + + *task = get_proc_task(inode); + if (*task == NULL) + return -ESRCH; + + err = mutex_lock_killable(&(*task)->signal->cred_guard_mutex); + if (err) + goto put_task; + + /* + * We have to revalidate both uid/gid and permissions. + * uid/gid revalidation is racy against execve(), so we do it under + * ->cred_guard_mutex. + * + * We cannot just update uid/gid because of LSM. + */ + __pid_revalidate(inode, *task); + + err = -EACCES; + if (proc_permission_check(*task, inode, mask)) + goto free_mutex; + + err = 0; + goto exit; + +free_mutex: + mutex_unlock(&(*task)->signal->cred_guard_mutex); +put_task: + put_task_struct(*task); + *task = NULL; +exit: + return err; +} + +static void unlock_pid_trace(struct task_struct *task) +{ + if (task) { + mutex_unlock(&task->signal->cred_guard_mutex); + put_task_struct(task); + } +} + +static int proc_pid_perms_open(struct inode *inode, struct file *file) +{ + struct proc_inode *pi = PROC_I(inode); + + /* No ptrace check here. All checks should be located + * in read/write/readdir/etc. */ + + if (pi->real_fops->open) + return pi->real_fops->open(inode, file); + return 0; +} + +static int proc_pid_perms_release(struct inode *inode, struct file *file) +{ + struct proc_inode *pi = PROC_I(inode); + + if (pi->real_fops->release) + return pi->real_fops->release(inode, file); + return 0; +} + +static ssize_t proc_pid_perms_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct dentry *dentry = file->f_dentry; + struct proc_inode *pi = PROC_I(dentry->d_inode); + int rc; + struct task_struct *task; + + if (pi->real_fops->read) { + rc = lock_pid_trace(dentry, MAY_READ, &task); + if (!rc) + rc = pi->real_fops->read(file, buf, count, ppos); + unlock_pid_trace(task); + return rc; + } + + /* Should never happen, but better safe than sorry */ + return -EPERM; +} + +static ssize_t proc_pid_perms_write(struct file *file, const char __user *buf, + size_t count, loff_t *offs) +{ + struct dentry *dentry = file->f_dentry; + struct proc_inode *pi = PROC_I(dentry->d_inode); + int rc; + struct task_struct *task; + + if (pi->real_fops->write) { + rc = lock_pid_trace(dentry, MAY_WRITE, &task); + if (!rc) + rc = pi->real_fops->write(file, buf, count, offs); + unlock_pid_trace(task); + return rc; + } + + /* Should never happen, but better safe than sorry */ + return -EPERM; +} + +static int proc_pid_perms_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = file->f_dentry; + struct proc_inode *pi = PROC_I(dentry->d_inode); + int rc; + struct task_struct *task; + + if (pi->real_fops->readdir) { + rc = lock_pid_trace(dentry, MAY_READ, &task); + if (!rc) + rc = pi->real_fops->readdir(file, dirent, filldir); + unlock_pid_trace(task); + return rc; + } + + /* Should never happen, but better safe than sorry */ + return -ENOTDIR; +} + +static unsigned proc_pid_perms_poll(struct file *file, poll_table *wait) +{ + struct dentry *dentry = file->f_dentry; + struct proc_inode *pi = PROC_I(dentry->d_inode); + struct task_struct *task; + int rc; + + /* Check perms before the poll. There is a race against execve(), + * but hopefuly the infoleak is very minor */ + rc = lock_pid_trace(dentry, MAY_READ, &task); + unlock_pid_trace(task); + if (rc) + return rc; + + if (pi->real_fops->poll) + return pi->real_fops->poll(file, wait); + + /* The fallback for files not implementing poll */ + return DEFAULT_POLLMASK; +} + +static loff_t proc_pid_perms_llseek(struct file *file, loff_t offset, int orig) +{ + struct proc_inode *pi = PROC_I(file->f_dentry->d_inode); + + /* Looks like llseek() doesn't need any ptrace protection */ + + if (pi->real_fops->llseek) + return pi->real_fops->llseek(file, offset, orig); + + return no_llseek(file, offset, orig); +} + +/* + * Global wrapper fops for _ALL_ files inside /proc/PID/. + * + * It checks the permissions (including ptrace() check) of the current + * task on all file ops. The runtime checks protect from holding any fd + * across execve() of set*id binaries. task->signal->cred_guard_mutex + * is held during read(), readdir(), and write(). open(), release(), + * llseek() are not protected as they are harmless anyway. It makes + * effort to check permissions on poll(), but it doesn't hold the + * mutex during actual fops->poll() as poll() might sleep for much time. + * + * It makes sense to use proc_pid_perms_fops even for files doing their + * own checks (e.g. REG_PERMS()). + */ +const struct file_operations proc_pid_perms_fops = { + .open = proc_pid_perms_open, + .read = proc_pid_perms_read, + .readdir = proc_pid_perms_readdir, + .write = proc_pid_perms_write, + .llseek = proc_pid_perms_llseek, + .poll = proc_pid_perms_poll, + .release = proc_pid_perms_release, +}; + +mode_t get_ent_perms(const struct pid_entry *p, + struct proc_perms *perms, + int ndir, + int nent) +{ + mode_t umask = perms->proc_ent_allowed[ndir][nent] ? 0 : 077; + return p->mode & ~umask; +} + +int init_proc_perms(struct proc_perms *p) +{ + int i; + + p->proc_pid_allowed = true; + for (i = 0; i < PROC_PERMS_NMAX; i++) { + size_t size = perms_size[i]*sizeof(bool); + p->proc_ent_allowed[i] = kzalloc(size, GFP_KERNEL); + if (p->proc_ent_allowed[i] == NULL) + goto err; + } + + return 0; +err: + for (i-- ; i >= 0; i--) + kfree(p->proc_ent_allowed[i]); + return -ENOMEM; +} + +void free_proc_perms(struct proc_perms *p) +{ + int i; + + for (i = 0; i < PROC_PERMS_NMAX; i++) + kfree(p->proc_ent_allowed[i]); +} + +void fill_proc_perms_ent(struct proc_perms *p, int ndir, bool val) +{ + int i; + for (i = 0; i < perms_size[ndir]; i++) + p->proc_ent_allowed[ndir][i] = val; +} + +bool set_proc_perms_ent(struct proc_perms *p, int ndir, const char *fname, bool val) +{ + int i; + for (i = 0; i < perms_size[ndir]; i++) + if (!strcmp(dir_ents[ndir][i].name, fname)) { + pr_err("found %s (%d)\n", fname, i); + p->proc_ent_allowed[ndir][i] = val; + return true; + } + + pr_err("not found %s (%d)\n", fname, i); + return false; +} diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 74b48cf..022ff46 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -18,6 +18,10 @@ #include <linux/module.h> #include <linux/sysctl.h> #include <linux/slab.h> +#include <linux/pid_namespace.h> +#include <linux/seq_file.h> +#include <linux/mount.h> + #include <asm/system.h> #include <asm/uaccess.h> @@ -69,6 +73,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->sysctl_entry = NULL; ei->ns = NULL; ei->ns_ops = NULL; + ei->real_fops = NULL; + ei->dirent = NULL; inode = &ei->vfs_inode; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; return inode; @@ -102,12 +108,25 @@ void __init proc_init_inodecache(void) init_once); } +static int proc_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct super_block *sb = vfs->mnt_sb; + struct pid_namespace *pid = sb->s_fs_info; + + if (pid->proc_perms.proc_pid_allowed) + seq_printf(seq, ",proc_pid_allowed=."); + + return 0; +} + static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .evict_inode = proc_evict_inode, .statfs = simple_statfs, + .remount_fs = proc_remount, + .show_options = proc_show_options, }; static void __pde_users_dec(struct proc_dir_entry *pde) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7838e5c..fe9ed27 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -137,11 +137,36 @@ int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, const char *name, int len, instantiate_t instantiate, struct task_struct *task, const void *ptr); int pid_revalidate(struct dentry *dentry, struct nameidata *nd); +extern void __pid_revalidate(struct inode *inode, struct task_struct *task); struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); extern const struct dentry_operations pid_dentry_operations; int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); int proc_setattr(struct dentry *dentry, struct iattr *attr); +int proc_remount(struct super_block *sb, int *flags, char *data); extern const struct inode_operations proc_ns_dir_inode_operations; extern const struct file_operations proc_ns_dir_operations; +extern const struct inode_operations proc_tgid_base_inode_operations; +extern const struct inode_operations proc_tid_base_inode_operations; +extern const struct inode_operations proc_attr_dir_inode_operations; + +extern const struct file_operations proc_pid_perms_fops; +extern const int perms_size[]; +extern const struct pid_entry *dir_ents[]; + +extern mode_t get_ent_perms(const struct pid_entry *p, + struct proc_perms *perms, + int ndir, + int nent); + +struct pid_entry { + char *name; + int len; + mode_t mode; + const struct inode_operations *iop; + const struct file_operations *fop; + union proc_op op; + bool need_perms_check; +}; + diff --git a/fs/proc/root.c b/fs/proc/root.c index d6c3b41..9193fa1 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,6 +18,7 @@ #include <linux/bitops.h> #include <linux/mount.h> #include <linux/pid_namespace.h> +#include <linux/parser.h> #include "internal.h" @@ -36,6 +37,112 @@ static int proc_set_super(struct super_block *sb, void *data) return err; } +enum { + Opt_err, Opt_tid_allowed, Opt_tgid_allowed, Opt_attr_allowed, +}; + +static const match_table_t tokens = { + {Opt_tgid_allowed, "tgid_allowed=%s"}, + {Opt_tid_allowed, "tid_allowed=%s"}, + {Opt_attr_allowed, "attr_allowed=%s"}, + {Opt_err, NULL}, +}; + +static bool proc_parse_xallowed(struct proc_perms *p, int ndir, char *val, bool set_pid_allowed) +{ + char *field; + + fill_proc_perms_ent(p, ndir, false); + + if (strcmp(val, "none") == 0) { + if (set_pid_allowed) + p->proc_pid_allowed = false; + return true; + } + if (strcmp(val, "all") == 0) { + fill_proc_perms_ent(p, ndir, true); + if (set_pid_allowed) + p->proc_pid_allowed = true; + return true; + } + + while ((field = strsep(&val, ";")) != NULL) { + if (!set_proc_perms_ent(p, ndir, field, true)) + return false; + } + + if (set_pid_allowed) + p->proc_pid_allowed = true; + return true; +} + +static int proc_parse_options(char *options, struct pid_namespace *pid) +{ + char *p, *val; + substring_t args[MAX_OPT_ARGS]; + + pr_debug("proc: options = %s\n", options); + + if (!options) + return 1; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + args[0].to = args[0].from = 0; + token = match_token(p, tokens, args); + switch (token) { + case Opt_tgid_allowed: + val = match_strdup(&args[0]); + if (!val) + return 0; + if (!proc_parse_xallowed(&pid->proc_perms, PROC_PERMS_NTGID, val, true)) { + kfree(val); + return 0; + } + kfree(val); + break; + case Opt_tid_allowed: + val = match_strdup(&args[0]); + if (!val) + return 0; + if (!proc_parse_xallowed(&pid->proc_perms, PROC_PERMS_NTID, val, false)) { + kfree(val); + return 0; + } + kfree(val); + break; + case Opt_attr_allowed: + val = match_strdup(&args[0]); + if (!val) + return 0; + if (!proc_parse_xallowed(&pid->proc_perms, PROC_PERMS_NATTR, val, false)) { + kfree(val); + return 0; + } + kfree(val); + break; + default: + pr_err("proc: unrecognized mount option \"%s\" " + "or missing value", p); + return 0; + } + } + + //pr_debug("proc: gid = %u, hidepid = %o, hidenet = %d\n", pid->pid_gid, pid->hide_pid, (int)pid->hide_net); + + return 1; +} + +int proc_remount(struct super_block *sb, int *flags, char *data) +{ + struct pid_namespace *pid = sb->s_fs_info; + return !proc_parse_options(data, pid); +} + + static struct dentry *proc_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -43,11 +150,16 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, struct super_block *sb; struct pid_namespace *ns; struct proc_inode *ei; + char *options; - if (flags & MS_KERNMOUNT) + + if (flags & MS_KERNMOUNT) { ns = (struct pid_namespace *)data; - else + options = NULL; + } else { ns = current->nsproxy->pid_ns; + options = data; + } sb = sget(fs_type, proc_test_super, proc_set_super, ns); if (IS_ERR(sb)) @@ -55,6 +167,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, if (!sb->s_root) { sb->s_flags = flags; + if (!proc_parse_options(options, ns)) { + deactivate_locked_super(sb); + return ERR_PTR(-EINVAL); + } err = proc_fill_super(sb); if (err) { deactivate_locked_super(sb); @@ -74,6 +190,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, return dget(sb->s_root); } + static void proc_kill_sb(struct super_block *sb) { struct pid_namespace *ns; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 980de54..da98ba2 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -201,7 +201,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_for_maps(priv->task); + mm = get_task_mm(priv->task); if (!mm || IS_ERR(mm)) { put_task_struct(priv->task); priv->task = NULL; diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 38d1032..ed10329 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -16,6 +16,18 @@ struct pidmap { struct bsd_acct_struct; +enum { + PROC_PERMS_NTID, + PROC_PERMS_NTGID, + PROC_PERMS_NATTR, + PROC_PERMS_NMAX +}; + +struct proc_perms { + bool proc_pid_allowed; + bool *proc_ent_allowed[PROC_PERMS_NMAX]; +}; + struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; @@ -30,6 +42,7 @@ struct pid_namespace { #ifdef CONFIG_BSD_PROCESS_ACCT struct bsd_acct_struct *bacct; #endif + struct proc_perms proc_perms; }; extern struct pid_namespace init_pid_ns; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index e7576cf..a1525d7 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -115,10 +115,17 @@ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); struct pid_namespace; +struct proc_perms; extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); +extern int init_proc_perms(struct proc_perms *p); +extern void free_proc_perms(struct proc_perms *p); + +extern void fill_proc_perms_ent(struct proc_perms *p, int ndir, bool val); +extern bool set_proc_perms_ent(struct proc_perms *p, int ndir, const char *fname, bool val); + /* * proc_tty.c */ @@ -267,6 +274,11 @@ struct proc_inode { struct pid *pid; int fd; union proc_op op; + + /* Used by files inside /proc/PID/ only */ + const struct file_operations *real_fops; + const void *dirent; + struct proc_dir_entry *pde; struct ctl_table_header *sysctl; struct ctl_table *sysctl_entry; diff --git a/kernel/pid.c b/kernel/pid.c index 57a8346..ffa8b37 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -565,6 +565,8 @@ void __init pidmap_init(void) set_bit(0, init_pid_ns.pidmap[0].page); atomic_dec(&init_pid_ns.pidmap[0].nr_free); + BUG_ON(init_proc_perms(&init_pid_ns.proc_perms)); + init_pid_ns.pid_cachep = KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC); } diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index e9c9adc..ab4261c 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -97,12 +97,18 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p for (i = 1; i < PIDMAP_ENTRIES; i++) atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); - err = pid_ns_prepare_proc(ns); + err = init_proc_perms(&ns->proc_perms); if (err) goto out_put_parent_pid_ns; + err = pid_ns_prepare_proc(ns); + if (err) + goto out_free_proc_perms; + return ns; +out_free_proc_perms: + free_proc_perms(&ns->proc_perms); out_put_parent_pid_ns: put_pid_ns(parent_pid_ns); out_free_map: @@ -119,6 +125,8 @@ static void destroy_pid_namespace(struct pid_namespace *ns) for (i = 0; i < PIDMAP_ENTRIES; i++) kfree(ns->pidmap[i].page); + + free_proc_perms(&ns->proc_perms); kmem_cache_free(pid_ns_cachep, ns); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f175d98..745b71e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -57,6 +57,7 @@ #include <linux/pipe_fs_i.h> #include <linux/oom.h> #include <linux/kmod.h> +#include <linux/pid_namespace.h> #include <asm/uaccess.h> #include <asm/processor.h> --
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.