--- zzzz-none-000/linux-3.10.107/fs/proc/base.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/proc/base.c 2021-02-04 17:41:59.000000000 +0000 @@ -105,7 +105,7 @@ */ struct pid_entry { - char *name; + const char *name; int len; umode_t mode; const struct inode_operations *iop; @@ -130,10 +130,6 @@ { .proc_get_link = get_link } ) #define REG(NAME, MODE, fops) \ NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) -#define INF(NAME, MODE, read) \ - NOD(NAME, (S_IFREG|(MODE)), \ - NULL, &proc_info_file_operations, \ - { .proc_read = read } ) #define ONE(NAME, MODE, show) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ @@ -173,7 +169,7 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path) { - struct task_struct *task = get_proc_task(dentry->d_inode); + struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; if (task) { @@ -190,7 +186,7 @@ static int proc_root_link(struct dentry *dentry, struct path *path) { - struct task_struct *task = get_proc_task(dentry->d_inode); + struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; if (task) { @@ -200,59 +196,224 @@ return result; } -static int proc_pid_cmdline(struct task_struct *task, char * buffer) +static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, + size_t _count, loff_t *pos) { - int res = 0; - unsigned int len; - struct mm_struct *mm = get_task_mm(task); + struct task_struct *tsk; + struct mm_struct *mm; + char *page; + unsigned long count = _count; + unsigned long arg_start, arg_end, env_start, env_end; + unsigned long len1, len2, len; + unsigned long p; + char c; + ssize_t rv; + + BUG_ON(*pos < 0); + + tsk = get_proc_task(file_inode(file)); + if (!tsk) + return -ESRCH; + mm = get_task_mm(tsk); + put_task_struct(tsk); if (!mm) - goto out; - if (!mm->arg_end) - goto out_mm; /* Shh! No looking before we're done */ + return 0; + /* Check if process spawned far enough to have cmdline. */ + if (!mm->env_end) { + rv = 0; + goto out_mmput; + } - len = mm->arg_end - mm->arg_start; - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - - res = access_process_vm(task, mm->arg_start, buffer, len, 0); - - // If the nul at the end of args has been overwritten, then - // assume application is using setproctitle(3). - if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { - len = strnlen(buffer, res); - if (len < res) { - res = len; + page = (char *)__get_free_page(GFP_TEMPORARY); + if (!page) { + rv = -ENOMEM; + goto out_mmput; + } + + down_read(&mm->mmap_sem); + arg_start = mm->arg_start; + arg_end = mm->arg_end; + env_start = mm->env_start; + env_end = mm->env_end; + up_read(&mm->mmap_sem); + + BUG_ON(arg_start > arg_end); + BUG_ON(env_start > env_end); + + len1 = arg_end - arg_start; + len2 = env_end - env_start; + + /* Empty ARGV. */ + if (len1 == 0) { + rv = 0; + goto out_free_page; + } + /* + * Inherently racy -- command line shares address space + * with code and data. + */ + rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + if (rv <= 0) + goto out_free_page; + + rv = 0; + + if (c == '\0') { + /* Command line (set of strings) occupies whole ARGV. */ + if (len1 <= *pos) + goto out_free_page; + + p = arg_start + *pos; + len = len1 - *pos; + while (count > 0 && len > 0) { + unsigned int _count; + int nr_read; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + } + } else { + /* + * Command line (1 string) occupies ARGV and maybe + * extends into ENVP. + */ + if (len1 + len2 <= *pos) + goto skip_argv_envp; + if (len1 <= *pos) + goto skip_argv; + + p = arg_start + *pos; + len = len1 - *pos; + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* + * Command line can be shorter than whole ARGV + * even if last "marker" byte says it is not. + */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; + } +skip_argv: + /* + * Command line (1 string) occupies ARGV and + * extends into ENVP. + */ + if (len1 <= *pos) { + p = env_start + *pos - len1; + len = len1 + len2 - *pos; } else { - len = mm->env_end - mm->env_start; - if (len > PAGE_SIZE - res) - len = PAGE_SIZE - res; - res += access_process_vm(task, mm->env_start, buffer+res, len, 0); - res = strnlen(buffer, res); + p = env_start; + len = len2; } + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* Find EOS. */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; + } +skip_argv_envp: + ; } -out_mm: + +out_free_page: + free_page((unsigned long)page); +out_mmput: mmput(mm); -out: - return res; + if (rv > 0) + *pos += rv; + return rv; } -static int proc_pid_auxv(struct task_struct *task, char *buffer) +static const struct file_operations proc_pid_cmdline_ops = { + .read = proc_pid_cmdline_read, + .llseek = generic_file_llseek, +}; + +static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); - int res = PTR_ERR(mm); if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; do { nwords += 2; } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ - res = nwords * sizeof(mm->saved_auxv[0]); - if (res > PAGE_SIZE) - res = PAGE_SIZE; - memcpy(buffer, mm->saved_auxv, res); + seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0])); mmput(mm); - } - return res; + return 0; + } else + return PTR_ERR(mm); } @@ -261,20 +422,21 @@ * Provides a wchan file via kallsyms in a proper one-value-per-file format. * Returns the resolved symbol. If that fails, simply return the address. */ -static int proc_pid_wchan(struct task_struct *task, char *buffer) +static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { unsigned long wchan; char symname[KSYM_NAME_LEN]; wchan = get_wchan(task); - if (lookup_symbol_name(wchan, symname) < 0) - if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) - return 0; - else - return sprintf(buffer, "%lu", wchan); + if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) + && !lookup_symbol_name(wchan, symname)) + seq_printf(m, "%s", symname); else - return sprintf(buffer, "%s", symname); + seq_putc(m, '0'); + + return 0; } #endif /* CONFIG_KALLSYMS */ @@ -332,16 +494,22 @@ } #endif -#ifdef CONFIG_SCHEDSTATS +#ifdef CONFIG_SCHED_INFO /* * Provides /proc/PID/schedstat */ -static int proc_pid_schedstat(struct task_struct *task, char *buffer) +static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - return sprintf(buffer, "%llu %llu %lu\n", - (unsigned long long)task->se.sum_exec_runtime, - (unsigned long long)task->sched_info.run_delay, - task->sched_info.pcount); + if (unlikely(!sched_info_on())) + seq_printf(m, "0 0 0\n"); + else + seq_printf(m, "%llu %llu %lu\n", + (unsigned long long)task->se.sum_exec_runtime, + (unsigned long long)task->sched_info.run_delay, + task->sched_info.pcount); + + return 0; } #endif @@ -405,38 +573,8 @@ #endif -#ifdef CONFIG_CGROUPS -static int cgroup_open(struct inode *inode, struct file *file) -{ - struct pid *pid = PROC_I(inode)->pid; - return single_open(file, proc_cgroup_show, pid); -} - -static const struct file_operations proc_cgroup_operations = { - .open = cgroup_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - -#ifdef CONFIG_PROC_PID_CPUSET - -static int cpuset_open(struct inode *inode, struct file *file) -{ - struct pid *pid = PROC_I(inode)->pid; - return single_open(file, proc_cpuset_show, pid); -} - -static const struct file_operations proc_cpuset_operations = { - .open = cpuset_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif - -static int proc_oom_score(struct task_struct *task, char *buffer) +static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { unsigned long totalpages = totalram_pages + total_swap_pages; unsigned long points = 0; @@ -446,12 +584,14 @@ points = oom_badness(task, NULL, NULL, totalpages) * 1000 / totalpages; read_unlock(&tasklist_lock); - return sprintf(buffer, "%lu\n", points); + seq_printf(m, "%lu\n", points); + + return 0; } struct limit_names { - char *name; - char *unit; + const char *name; + const char *unit; }; static const struct limit_names lnames[RLIM_NLIMITS] = { @@ -474,12 +614,11 @@ }; /* Display limits for a process */ -static int proc_pid_limits(struct task_struct *task, char *buffer) +static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { unsigned int i; - int count = 0; unsigned long flags; - char *bufptr = buffer; struct rlimit rlim[RLIM_NLIMITS]; @@ -491,54 +630,56 @@ /* * print the file header */ - count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", - "Limit", "Soft Limit", "Hard Limit", "Units"); + seq_printf(m, "%-25s %-20s %-20s %-10s\n", + "Limit", "Soft Limit", "Hard Limit", "Units"); for (i = 0; i < RLIM_NLIMITS; i++) { if (rlim[i].rlim_cur == RLIM_INFINITY) - count += sprintf(&bufptr[count], "%-25s %-20s ", - lnames[i].name, "unlimited"); + seq_printf(m, "%-25s %-20s ", + lnames[i].name, "unlimited"); else - count += sprintf(&bufptr[count], "%-25s %-20lu ", - lnames[i].name, rlim[i].rlim_cur); + seq_printf(m, "%-25s %-20lu ", + lnames[i].name, rlim[i].rlim_cur); if (rlim[i].rlim_max == RLIM_INFINITY) - count += sprintf(&bufptr[count], "%-20s ", "unlimited"); + seq_printf(m, "%-20s ", "unlimited"); else - count += sprintf(&bufptr[count], "%-20lu ", - rlim[i].rlim_max); + seq_printf(m, "%-20lu ", rlim[i].rlim_max); if (lnames[i].unit) - count += sprintf(&bufptr[count], "%-10s\n", - lnames[i].unit); + seq_printf(m, "%-10s\n", lnames[i].unit); else - count += sprintf(&bufptr[count], "\n"); + seq_putc(m, '\n'); } - return count; + return 0; } #ifdef CONFIG_HAVE_ARCH_TRACEHOOK -static int proc_pid_syscall(struct task_struct *task, char *buffer) +static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { long nr; unsigned long args[6], sp, pc; - int res = lock_trace(task); + int res; + + res = lock_trace(task); if (res) return res; if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) - res = sprintf(buffer, "running\n"); + seq_puts(m, "running\n"); else if (nr < 0) - res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc); else - res = sprintf(buffer, + seq_printf(m, "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", nr, args[0], args[1], args[2], args[3], args[4], args[5], sp, pc); unlock_trace(task); - return res; + + return 0; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ @@ -566,7 +707,7 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) { int error; - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); if (attr->ia_valid & ATTR_MODE) return -EPERM; @@ -630,43 +771,6 @@ .setattr = proc_setattr, }; -#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ - -static ssize_t proc_info_read(struct file * file, char __user * buf, - size_t count, loff_t *ppos) -{ - struct inode * inode = file_inode(file); - unsigned long page; - ssize_t length; - struct task_struct *task = get_proc_task(inode); - - length = -ESRCH; - if (!task) - goto out_no_task; - - if (count > PROC_BLOCK_SIZE) - count = PROC_BLOCK_SIZE; - - length = -ENOMEM; - if (!(page = __get_free_page(GFP_TEMPORARY))) - goto out; - - length = PROC_I(inode)->op.proc_read(task, (char*)page); - - if (length >= 0) - length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); - free_page(page); -out: - put_task_struct(task); -out_no_task: - return length; -} - -static const struct file_operations proc_info_file_operations = { - .read = proc_info_read, - .llseek = generic_file_llseek, -}; - static int proc_single_show(struct seq_file *m, void *v) { struct inode *inode = m->private; @@ -699,29 +803,35 @@ .release = single_release, }; -static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) + +struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { - struct task_struct *task = get_proc_task(file_inode(file)); - struct mm_struct *mm; + struct task_struct *task = get_proc_task(inode); + struct mm_struct *mm = ERR_PTR(-ESRCH); - if (!task) - return -ESRCH; + if (task) { + mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); + put_task_struct(task); - mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); - put_task_struct(task); + if (!IS_ERR_OR_NULL(mm)) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + } + + return mm; +} + +static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) +{ + struct mm_struct *mm = proc_mem_open(inode, mode); if (IS_ERR(mm)) return PTR_ERR(mm); - if (mm) { - /* ensure this mm_struct can't be freed */ - atomic_inc(&mm->mm_count); - /* but do not pin its memory */ - mmput(mm); - } - file->private_data = mm; - return 0; } @@ -924,6 +1034,16 @@ return simple_read_from_buffer(buf, count, ppos, buffer, len); } +/* + * /proc/pid/oom_adj exists solely for backwards compatibility with previous + * kernels. The effective policy is defined by oom_score_adj, which has a + * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. + * Values written to oom_adj are simply mapped linearly to oom_score_adj. + * Processes that become oom disabled via oom_adj will still be oom disabled + * with this implementation. + * + * oom_adj cannot be removed since existing userspace binaries use it. + */ static ssize_t oom_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -1119,10 +1239,9 @@ size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); - char *page, *tmp; - ssize_t length; uid_t loginuid; kuid_t kloginuid; + int rv; rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { @@ -1131,40 +1250,28 @@ } rcu_read_unlock(); - if (count >= PAGE_SIZE) - count = PAGE_SIZE - 1; - if (*ppos != 0) { /* No partial writes. */ return -EINVAL; } - page = (char*)__get_free_page(GFP_TEMPORARY); - if (!page) - return -ENOMEM; - length = -EFAULT; - if (copy_from_user(page, buf, count)) - goto out_free_page; - - page[count] = '\0'; - loginuid = simple_strtoul(page, &tmp, 10); - if (tmp == page) { - length = -EINVAL; - goto out_free_page; + rv = kstrtou32_from_user(buf, count, 10, &loginuid); + if (rv < 0) + return rv; + + /* is userspace tring to explicitly UNSET the loginuid? */ + if (loginuid == AUDIT_UID_UNSET) { + kloginuid = INVALID_UID; + } else { + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) + return -EINVAL; } - kloginuid = make_kuid(file->f_cred->user_ns, loginuid); - if (!uid_valid(kloginuid)) { - length = -EINVAL; - goto out_free_page; - } - - length = audit_set_loginuid(kloginuid); - if (likely(length == 0)) - length = count; -out_free_page: - free_page((unsigned long) page); - return length; + rv = audit_set_loginuid(kloginuid); + if (rv < 0) + return rv; + return count; } static const struct file_operations proc_loginuid_operations = { @@ -1218,8 +1325,9 @@ const char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task; - char buffer[PROC_NUMBUF], *end; + char buffer[PROC_NUMBUF]; int make_it_fail; + int rv; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -1228,9 +1336,12 @@ count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - make_it_fail = simple_strtol(strstrip(buffer), &end, 0); - if (*end) + rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); + if (rv < 0) + return rv; + if (make_it_fail < 0 || make_it_fail > 1) return -EINVAL; + task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; @@ -1434,18 +1545,13 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) { struct task_struct *task; - struct mm_struct *mm; struct file *exe_file; - task = get_proc_task(dentry->d_inode); + task = get_proc_task(d_inode(dentry)); if (!task) return -ENOENT; - mm = get_task_mm(task); + exe_file = get_task_exe_file(task); put_task_struct(task); - if (!mm) - return -ENOENT; - exe_file = get_mm_exe_file(mm); - mmput(mm); if (exe_file) { *exe_path = exe_file->f_path; path_get(&exe_file->f_path); @@ -1455,9 +1561,9 @@ return -ENOENT; } -static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct path path; int error = -EACCES; @@ -1469,7 +1575,7 @@ if (error) goto out; - nd_jump_link(nd, &path); + nd_jump_link(&path); return NULL; out: return ERR_PTR(error); @@ -1502,7 +1608,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) { int error = -EACCES; - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct path path; /* Are we allowed to snoop on the tasks file descriptors? */ @@ -1572,7 +1678,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct task_struct *task; const struct cred *cred; struct pid_namespace *pid = dentry->d_sb->s_fs_info; @@ -1629,7 +1735,7 @@ if (flags & LOOKUP_RCU) return -ECHILD; - inode = dentry->d_inode; + inode = d_inode(dentry); task = get_proc_task(inode); if (task) { @@ -1649,17 +1755,21 @@ put_task_struct(task); return 1; } - d_drop(dentry); return 0; } +static inline bool proc_inode_is_dead(struct inode *inode) +{ + return !proc_pid(inode)->tasks[PIDTYPE_PID].first; +} + int pid_delete_dentry(const struct dentry *dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; + return proc_inode_is_dead(d_inode(dentry)); } const struct dentry_operations pid_dentry_operations = @@ -1682,50 +1792,36 @@ * reported by readdir in sync with the inode numbers reported * by stat. */ -int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, +bool proc_fill_cache(struct file *file, struct dir_context *ctx, const char *name, int len, instantiate_t instantiate, struct task_struct *task, const void *ptr) { - struct dentry *child, *dir = filp->f_path.dentry; + struct dentry *child, *dir = file->f_path.dentry; + struct qstr qname = QSTR_INIT(name, len); struct inode *inode; - struct qstr qname; - ino_t ino = 0; - unsigned type = DT_UNKNOWN; - - qname.name = name; - qname.len = len; - qname.hash = full_name_hash(name, len); + unsigned type; + ino_t ino; - child = d_lookup(dir, &qname); + child = d_hash_and_lookup(dir, &qname); if (!child) { - struct dentry *new; - new = d_alloc(dir, &qname); - if (new) { - child = instantiate(dir->d_inode, new, task, ptr); - if (child) - dput(new); - else - child = new; - } - } - if (!child || IS_ERR(child) || !child->d_inode) - goto end_instantiate; - inode = child->d_inode; - if (inode) { - ino = inode->i_ino; - type = inode->i_mode >> 12; + child = d_alloc(dir, &qname); + if (!child) + goto end_instantiate; + if (instantiate(d_inode(dir), child, task, ptr) < 0) { + dput(child); + goto end_instantiate; + } } + inode = d_inode(child); + ino = inode->i_ino; + type = inode->i_mode >> 12; dput(child); + return dir_emit(ctx, name, len, ino, type); + end_instantiate: - if (!ino) - ino = find_inode_number(dir, &qname); - if (!ino) - ino = 1; - return filldir(dirent, name, len, filp->f_pos, ino, type); + return dir_emit(ctx, name, len, 1, DT_UNKNOWN); } -#ifdef CONFIG_CHECKPOINT_RESTORE - /* * dname_to_vma_addr - maps a dentry name into two unsigned longs * which represent vma start and end addresses. @@ -1752,12 +1848,7 @@ if (flags & LOOKUP_RCU) return -ECHILD; - if (!capable(CAP_SYS_ADMIN)) { - status = -EPERM; - goto out_notask; - } - - inode = dentry->d_inode; + inode = d_inode(dentry); task = get_proc_task(inode); if (!task) goto out_notask; @@ -1793,9 +1884,6 @@ put_task_struct(task); out_notask: - if (status <= 0) - d_drop(dentry); - return status; } @@ -1813,7 +1901,7 @@ int rc; rc = -ENOENT; - task = get_proc_task(dentry->d_inode); + task = get_proc_task(d_inode(dentry)); if (!task) goto out; @@ -1848,7 +1936,30 @@ unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; -static struct dentry * +/* + * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the + * symlinks may be used to bypass permissions on ancestor directories in the + * path to the file in question. + */ +static const char * +proc_map_files_follow_link(struct dentry *dentry, void **cookie) +{ + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + return proc_pid_follow_link(dentry, NULL); +} + +/* + * Identical to proc_pid_link_inode_operations except for follow_link() + */ +static const struct inode_operations proc_map_files_link_inode_operations = { + .readlink = proc_pid_readlink, + .follow_link = proc_map_files_follow_link, + .setattr = proc_setattr, +}; + +static int proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -1858,12 +1969,12 @@ inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) - return ERR_PTR(-ENOENT); + return -ENOENT; ei = PROC_I(inode); ei->op.proc_get_link = proc_map_files_get_link; - inode->i_op = &proc_pid_link_inode_operations; + inode->i_op = &proc_map_files_link_inode_operations; inode->i_size = 64; inode->i_mode = S_IFLNK; @@ -1875,7 +1986,7 @@ d_set_d_op(dentry, &tid_map_files_dentry_operations); d_add(dentry, inode); - return NULL; + return 0; } static struct dentry *proc_map_files_lookup(struct inode *dir, @@ -1884,23 +1995,19 @@ unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; - struct dentry *result; + int result; struct mm_struct *mm; - result = ERR_PTR(-EPERM); - if (!capable(CAP_SYS_ADMIN)) - goto out; - - result = ERR_PTR(-ENOENT); + result = -ENOENT; task = get_proc_task(dir); if (!task) goto out; - result = ERR_PTR(-EACCES); + result = -EACCES; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; - result = ERR_PTR(-ENOENT); + result = -ENOENT; if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) goto out_put_task; @@ -1923,7 +2030,7 @@ out_put_task: put_task_struct(task); out: - return result; + return ERR_PTR(result); } static const struct inode_operations proc_map_files_inode_operations = { @@ -1933,22 +2040,19 @@ }; static int -proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) +proc_map_files_readdir(struct file *file, struct dir_context *ctx) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; struct vm_area_struct *vma; struct task_struct *task; struct mm_struct *mm; - ino_t ino; + unsigned long nr_files, pos, i; + struct flex_array *fa = NULL; + struct map_files_info info; + struct map_files_info *p; int ret; - ret = -EPERM; - if (!capable(CAP_SYS_ADMIN)) - goto out; - ret = -ENOENT; - task = get_proc_task(inode); + task = get_proc_task(file_inode(file)); if (!task) goto out; @@ -1957,91 +2061,73 @@ goto out_put_task; ret = 0; - switch (filp->f_pos) { - case 0: - ino = inode->i_ino; - if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) - goto out_put_task; - filp->f_pos++; - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) - goto out_put_task; - filp->f_pos++; - default: - { - unsigned long nr_files, pos, i; - struct flex_array *fa = NULL; - struct map_files_info info; - struct map_files_info *p; + if (!dir_emit_dots(file, ctx)) + goto out_put_task; - mm = get_task_mm(task); - if (!mm) - goto out_put_task; - down_read(&mm->mmap_sem); + mm = get_task_mm(task); + if (!mm) + goto out_put_task; + down_read(&mm->mmap_sem); - nr_files = 0; + nr_files = 0; - /* - * We need two passes here: - * - * 1) Collect vmas of mapped files with mmap_sem taken - * 2) Release mmap_sem and instantiate entries - * - * otherwise we get lockdep complained, since filldir() - * routine might require mmap_sem taken in might_fault(). - */ + /* + * We need two passes here: + * + * 1) Collect vmas of mapped files with mmap_sem taken + * 2) Release mmap_sem and instantiate entries + * + * otherwise we get lockdep complained, since filldir() + * routine might require mmap_sem taken in might_fault(). + */ - for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { - if (vma->vm_file && ++pos > filp->f_pos) - nr_files++; - } - - if (nr_files) { - fa = flex_array_alloc(sizeof(info), nr_files, - GFP_KERNEL); - if (!fa || flex_array_prealloc(fa, 0, nr_files, - GFP_KERNEL)) { - ret = -ENOMEM; - if (fa) - flex_array_free(fa); - up_read(&mm->mmap_sem); - mmput(mm); - goto out_put_task; - } - for (i = 0, vma = mm->mmap, pos = 2; vma; - vma = vma->vm_next) { - if (!vma->vm_file) - continue; - if (++pos <= filp->f_pos) - continue; - - info.mode = vma->vm_file->f_mode; - info.len = snprintf(info.name, - sizeof(info.name), "%lx-%lx", - vma->vm_start, vma->vm_end); - if (flex_array_put(fa, i++, &info, GFP_KERNEL)) - BUG(); - } - } - up_read(&mm->mmap_sem); + for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { + if (vma->vm_file && ++pos > ctx->pos) + nr_files++; + } - for (i = 0; i < nr_files; i++) { - p = flex_array_get(fa, i); - ret = proc_fill_cache(filp, dirent, filldir, - p->name, p->len, - proc_map_files_instantiate, - task, - (void *)(unsigned long)p->mode); - if (ret) - break; - filp->f_pos++; + if (nr_files) { + fa = flex_array_alloc(sizeof(info), nr_files, + GFP_KERNEL); + if (!fa || flex_array_prealloc(fa, 0, nr_files, + GFP_KERNEL)) { + ret = -ENOMEM; + if (fa) + flex_array_free(fa); + up_read(&mm->mmap_sem); + mmput(mm); + goto out_put_task; + } + for (i = 0, vma = mm->mmap, pos = 2; vma; + vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (++pos <= ctx->pos) + continue; + + info.mode = vma->vm_file->f_mode; + info.len = snprintf(info.name, + sizeof(info.name), "%lx-%lx", + vma->vm_start, vma->vm_end); + if (flex_array_put(fa, i++, &info, GFP_KERNEL)) + BUG(); } - if (fa) - flex_array_free(fa); - mmput(mm); } + up_read(&mm->mmap_sem); + + for (i = 0; i < nr_files; i++) { + p = flex_array_get(fa, i); + if (!proc_fill_cache(file, ctx, + p->name, p->len, + proc_map_files_instantiate, + task, + (void *)(unsigned long)p->mode)) + break; + ctx->pos++; } + if (fa) + flex_array_free(fa); + mmput(mm); out_put_task: put_task_struct(task); @@ -2051,7 +2137,7 @@ static const struct file_operations proc_map_files_operations = { .read = generic_read_dir, - .readdir = proc_map_files_readdir, + .iterate = proc_map_files_readdir, .llseek = default_llseek, }; @@ -2104,7 +2190,7 @@ struct k_itimer *timer; struct timers_private *tp = m->private; int notify; - static char *nstr[] = { + static const char * const nstr[] = { [SIGEV_SIGNAL] = "signal", [SIGEV_NONE] = "none", [SIGEV_THREAD] = "thread", @@ -2114,12 +2200,13 @@ notify = timer->it_sigev_notify; seq_printf(m, "ID: %d\n", timer->it_id); - seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo, - timer->sigq->info.si_value.sival_ptr); + seq_printf(m, "signal: %d/%p\n", + timer->sigq->info.si_signo, + timer->sigq->info.si_value.sival_ptr); seq_printf(m, "notify: %s/%s.%d\n", - nstr[notify & ~SIGEV_THREAD_ID], - (notify & SIGEV_THREAD_ID) ? "tid" : "pid", - pid_nr_ns(timer->it_pid, tp->ns)); + nstr[notify & ~SIGEV_THREAD_ID], + (notify & SIGEV_THREAD_ID) ? "tid" : "pid", + pid_nr_ns(timer->it_pid, tp->ns)); seq_printf(m, "ClockID: %d\n", timer->it_clock); return 0; @@ -2152,15 +2239,13 @@ .llseek = seq_lseek, .release = seq_release_private, }; -#endif /* CONFIG_CHECKPOINT_RESTORE */ -static struct dentry *proc_pident_instantiate(struct inode *dir, +static int proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) @@ -2179,9 +2264,9 @@ d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } static struct dentry *proc_pident_lookup(struct inode *dir, @@ -2189,11 +2274,11 @@ const struct pid_entry *ents, unsigned int nents) { - struct dentry *error; + int error; struct task_struct *task = get_proc_task(dir); const struct pid_entry *p, *last; - error = ERR_PTR(-ENOENT); + error = -ENOENT; if (!task) goto out_no_task; @@ -2216,70 +2301,33 @@ out: put_task_struct(task); out_no_task: - return error; -} - -static int proc_pident_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct task_struct *task, const struct pid_entry *p) -{ - return proc_fill_cache(filp, dirent, filldir, p->name, p->len, - proc_pident_instantiate, task, p); + return ERR_PTR(error); } -static int proc_pident_readdir(struct file *filp, - void *dirent, filldir_t filldir, +static int proc_pident_readdir(struct file *file, struct dir_context *ctx, const struct pid_entry *ents, unsigned int nents) { - int i; - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); - const struct pid_entry *p, *last; - ino_t ino; - int ret; + struct task_struct *task = get_proc_task(file_inode(file)); + const struct pid_entry *p; - ret = -ENOENT; if (!task) - goto out_no_task; + return -ENOENT; - ret = 0; - i = filp->f_pos; - switch (i) { - case 0: - ino = inode->i_ino; - if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) - goto out; - i++; - filp->f_pos++; - /* fall through */ - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) - goto out; - i++; - filp->f_pos++; - /* fall through */ - default: - i -= 2; - if (i >= nents) { - ret = 1; - goto out; - } - p = ents + i; - last = &ents[nents - 1]; - while (p <= last) { - if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) - goto out; - filp->f_pos++; - p++; - } - } + if (!dir_emit_dots(file, ctx)) + goto out; - ret = 1; + if (ctx->pos >= nents + 2) + goto out; + + for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { + if (!proc_fill_cache(file, ctx, p->name, p->len, + proc_pident_instantiate, task, p)) + break; + ctx->pos++; + } out: put_task_struct(task); -out_no_task: - return ret; + return 0; } #ifdef CONFIG_SECURITY @@ -2364,16 +2412,15 @@ REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), }; -static int proc_attr_dir_readdir(struct file * filp, - void * dirent, filldir_t filldir) +static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) { - return proc_pident_readdir(filp,dirent,filldir, - attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); + return proc_pident_readdir(file, ctx, + attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); } static const struct file_operations proc_attr_dir_operations = { .read = generic_read_dir, - .readdir = proc_attr_dir_readdir, + .iterate = proc_attr_dir_readdir, .llseek = default_llseek, }; @@ -2427,35 +2474,24 @@ { struct task_struct *task; struct mm_struct *mm; - char buffer[PROC_NUMBUF], *end; unsigned int val; int ret; int i; unsigned long mask; - ret = -EFAULT; - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - goto out_no_task; - - ret = -EINVAL; - val = (unsigned int)simple_strtoul(buffer, &end, 0); - if (*end == '\n') - end++; - if (end - buffer == 0) - goto out_no_task; + ret = kstrtouint_from_user(buf, count, 0, &val); + if (ret < 0) + return ret; ret = -ESRCH; task = get_proc_task(file_inode(file)); if (!task) goto out_no_task; - ret = end - buffer; mm = get_task_mm(task); if (!mm) goto out_no_mm; + ret = 0; for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { if (val & mask) @@ -2468,7 +2504,9 @@ out_no_mm: put_task_struct(task); out_no_task: - return ret; + if (ret < 0) + return ret; + return count; } static const struct file_operations proc_coredump_filter_operations = { @@ -2479,7 +2517,7 @@ #endif #ifdef CONFIG_TASK_IO_ACCOUNTING -static int do_io_accounting(struct task_struct *task, char *buffer, int whole) +static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) { struct task_io_accounting acct = task->ioac; unsigned long flags; @@ -2503,40 +2541,44 @@ unlock_task_sighand(task, &flags); } - result = sprintf(buffer, - "rchar: %llu\n" - "wchar: %llu\n" - "syscr: %llu\n" - "syscw: %llu\n" - "read_bytes: %llu\n" - "write_bytes: %llu\n" - "cancelled_write_bytes: %llu\n", - (unsigned long long)acct.rchar, - (unsigned long long)acct.wchar, - (unsigned long long)acct.syscr, - (unsigned long long)acct.syscw, - (unsigned long long)acct.read_bytes, - (unsigned long long)acct.write_bytes, - (unsigned long long)acct.cancelled_write_bytes); + seq_printf(m, + "rchar: %llu\n" + "wchar: %llu\n" + "syscr: %llu\n" + "syscw: %llu\n" + "read_bytes: %llu\n" + "write_bytes: %llu\n" + "cancelled_write_bytes: %llu\n", + (unsigned long long)acct.rchar, + (unsigned long long)acct.wchar, + (unsigned long long)acct.syscr, + (unsigned long long)acct.syscw, + (unsigned long long)acct.read_bytes, + (unsigned long long)acct.write_bytes, + (unsigned long long)acct.cancelled_write_bytes); + result = 0; + out_unlock: mutex_unlock(&task->signal->cred_guard_mutex); return result; } -static int proc_tid_io_accounting(struct task_struct *task, char *buffer) +static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - return do_io_accounting(task, buffer, 0); + return do_io_accounting(task, m, 0); } -static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) +static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - return do_io_accounting(task, buffer, 1); + return do_io_accounting(task, m, 1); } #endif /* CONFIG_TASK_IO_ACCOUNTING */ #ifdef CONFIG_USER_NS static int proc_id_map_open(struct inode *inode, struct file *file, - struct seq_operations *seq_ops) + const struct seq_operations *seq_ops) { struct user_namespace *ns = NULL; struct task_struct *task; @@ -2686,19 +2728,17 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), -#ifdef CONFIG_CHECKPOINT_RESTORE DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), -#endif DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), - INF("auxv", S_IRUSR, proc_pid_auxv), + ONE("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), - ONE("personality", S_IRUGO, proc_pid_personality), - INF("limits", S_IRUGO, proc_pid_limits), + ONE("personality", S_IRUSR, proc_pid_personality), + ONE("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif @@ -2707,9 +2747,9 @@ #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUGO, proc_pid_syscall), + ONE("syscall", S_IRUSR, proc_pid_syscall), #endif - INF("cmdline", S_IRUGO, proc_pid_cmdline), + REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), @@ -2726,30 +2766,30 @@ #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), - REG("pagemap", S_IRUGO, proc_pagemap_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS - INF("wchan", S_IRUGO, proc_pid_wchan), + ONE("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE - ONE("stack", S_IRUGO, proc_pid_stack), + ONE("stack", S_IRUSR, proc_pid_stack), #endif -#ifdef CONFIG_SCHEDSTATS - INF("schedstat", S_IRUGO, proc_pid_schedstat), +#ifdef CONFIG_SCHED_INFO + ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, proc_cpuset_operations), + ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, proc_cgroup_operations), + ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif - INF("oom_score", S_IRUGO, proc_oom_score), + ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL @@ -2763,10 +2803,10 @@ REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUSR, proc_tgid_io_accounting), + ONE("io", S_IRUSR, proc_tgid_io_accounting), #endif #ifdef CONFIG_HARDWALL - INF("hardwall", S_IRUGO, proc_pid_hardwall), + ONE("hardwall", S_IRUGO, proc_pid_hardwall), #endif #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), @@ -2779,16 +2819,15 @@ #endif }; -static int proc_tgid_base_readdir(struct file * filp, - void * dirent, filldir_t filldir) +static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) { - return proc_pident_readdir(filp,dirent,filldir, - tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); + return proc_pident_readdir(file, ctx, + tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } static const struct file_operations proc_tgid_base_operations = { .read = generic_read_dir, - .readdir = proc_tgid_base_readdir, + .iterate = proc_tgid_base_readdir, .llseek = default_llseek, }; @@ -2816,11 +2855,13 @@ /* no ->d_hash() rejects on procfs */ dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); - d_drop(dentry); + d_invalidate(dentry); dput(dentry); } + if (pid == tgid) + return; + name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); @@ -2837,8 +2878,7 @@ name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { - shrink_dcache_parent(dentry); - d_drop(dentry); + d_invalidate(dentry); dput(dentry); } @@ -2890,11 +2930,10 @@ } } -static struct dentry *proc_pid_instantiate(struct inode *dir, - struct dentry * dentry, - struct task_struct *task, const void *ptr) +static int proc_pid_instantiate(struct inode *dir, + struct dentry * dentry, + struct task_struct *task, const void *ptr) { - struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; inode = proc_pid_make_inode(dir->i_sb, task); @@ -2914,19 +2953,19 @@ d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result = NULL; + int result = -ENOENT; struct task_struct *task; unsigned tgid; struct pid_namespace *ns; - tgid = name_to_int(dentry); + tgid = name_to_int(&dentry->d_name); if (tgid == ~0U) goto out; @@ -2942,7 +2981,7 @@ result = proc_pid_instantiate(dir, dentry, task, NULL); put_task_struct(task); out: - return result; + return ERR_PTR(result); } /* @@ -2988,60 +3027,49 @@ return iter; } -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) - -static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct tgid_iter iter) -{ - char name[PROC_NUMBUF]; - int len = snprintf(name, sizeof(name), "%d", iter.tgid); - return proc_fill_cache(filp, dirent, filldir, name, len, - proc_pid_instantiate, iter.task, NULL); -} - -static int fake_filldir(void *buf, const char *name, int namelen, - loff_t offset, u64 ino, unsigned d_type) -{ - return 0; -} +#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) /* for the /proc/ directory itself, after non-process stuff has been done */ -int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) +int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; - struct pid_namespace *ns; - filldir_t __filldir; - loff_t pos = filp->f_pos; + struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; + loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) - goto out; + return 0; + if (pos == TGID_OFFSET - 2) { + struct inode *inode = d_inode(ns->proc_self); + if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) + return 0; + ctx->pos = pos = pos + 1; + } if (pos == TGID_OFFSET - 1) { - if (proc_fill_cache(filp, dirent, filldir, "self", 4, - NULL, NULL, NULL) < 0) - goto out; - iter.tgid = 0; - } else { - iter.tgid = pos - TGID_OFFSET; + struct inode *inode = d_inode(ns->proc_thread_self); + if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) + return 0; + ctx->pos = pos = pos + 1; } + iter.tgid = pos - TGID_OFFSET; iter.task = NULL; - ns = filp->f_dentry->d_sb->s_fs_info; for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { - if (has_pid_permissions(ns, iter.task, 2)) - __filldir = filldir; - else - __filldir = fake_filldir; + char name[PROC_NUMBUF]; + int len; + if (!has_pid_permissions(ns, iter.task, 2)) + continue; - filp->f_pos = iter.tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { + len = snprintf(name, sizeof(name), "%d", iter.tgid); + ctx->pos = iter.tgid + TGID_OFFSET; + if (!proc_fill_cache(file, ctx, name, len, + proc_pid_instantiate, iter.task, NULL)) { put_task_struct(iter.task); - goto out; + return 0; } } - filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; -out: + ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; return 0; } @@ -3052,23 +3080,26 @@ DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), +#ifdef CONFIG_NET + DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), +#endif REG("environ", S_IRUSR, proc_environ_operations), - INF("auxv", S_IRUSR, proc_pid_auxv), + ONE("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), - ONE("personality", S_IRUGO, proc_pid_personality), - INF("limits", S_IRUGO, proc_pid_limits), + ONE("personality", S_IRUSR, proc_pid_personality), + ONE("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUGO, proc_pid_syscall), + ONE("syscall", S_IRUSR, proc_pid_syscall), #endif - INF("cmdline", S_IRUGO, proc_pid_cmdline), + REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_tid_maps_operations), -#ifdef CONFIG_CHECKPOINT_RESTORE +#ifdef CONFIG_PROC_CHILDREN REG("children", S_IRUGO, proc_tid_children_operations), #endif #ifdef CONFIG_NUMA @@ -3083,30 +3114,30 @@ #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_tid_smaps_operations), - REG("pagemap", S_IRUGO, proc_pagemap_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS - INF("wchan", S_IRUGO, proc_pid_wchan), + ONE("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE - ONE("stack", S_IRUGO, proc_pid_stack), + ONE("stack", S_IRUSR, proc_pid_stack), #endif -#ifdef CONFIG_SCHEDSTATS - INF("schedstat", S_IRUGO, proc_pid_schedstat), +#ifdef CONFIG_SCHED_INFO + ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, proc_cpuset_operations), + ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, proc_cgroup_operations), + ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif - INF("oom_score", S_IRUGO, proc_oom_score), + ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL @@ -3117,10 +3148,10 @@ REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUSR, proc_tid_io_accounting), + ONE("io", S_IRUSR, proc_tid_io_accounting), #endif #ifdef CONFIG_HARDWALL - INF("hardwall", S_IRUGO, proc_pid_hardwall), + ONE("hardwall", S_IRUGO, proc_pid_hardwall), #endif #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), @@ -3130,11 +3161,10 @@ #endif }; -static int proc_tid_base_readdir(struct file * filp, - void * dirent, filldir_t filldir) +static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) { - return proc_pident_readdir(filp,dirent,filldir, - tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); + return proc_pident_readdir(file, ctx, + tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -3145,7 +3175,7 @@ static const struct file_operations proc_tid_base_operations = { .read = generic_read_dir, - .readdir = proc_tid_base_readdir, + .iterate = proc_tid_base_readdir, .llseek = default_llseek, }; @@ -3155,10 +3185,9 @@ .setattr = proc_setattr, }; -static struct dentry *proc_task_instantiate(struct inode *dir, +static int proc_task_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; inode = proc_pid_make_inode(dir->i_sb, task); @@ -3177,14 +3206,14 @@ d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, 0)) - error = NULL; + return 0; out: - return error; + return -ENOENT; } static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result = ERR_PTR(-ENOENT); + int result = -ENOENT; struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; @@ -3193,7 +3222,7 @@ if (!leader) goto out_no_task; - tid = name_to_int(dentry); + tid = name_to_int(&dentry->d_name); if (tid == ~0U) goto out; @@ -3214,7 +3243,7 @@ out: put_task_struct(leader); out_no_task: - return result; + return ERR_PTR(result); } /* @@ -3229,34 +3258,42 @@ * In the case of a seek we start with the leader and walk nr * threads past it. */ -static struct task_struct *first_tid(struct task_struct *leader, - int tid, int nr, struct pid_namespace *ns) +static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, + struct pid_namespace *ns) { - struct task_struct *pos; + struct task_struct *pos, *task; + unsigned long nr = f_pos; + + if (nr != f_pos) /* 32bit overflow? */ + return NULL; rcu_read_lock(); - /* Attempt to start with the pid of a thread */ - if (tid && (nr > 0)) { + task = pid_task(pid, PIDTYPE_PID); + if (!task) + goto fail; + + /* Attempt to start with the tid of a thread */ + if (tid && nr) { pos = find_task_by_pid_ns(tid, ns); - if (pos && (pos->group_leader == leader)) + if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ - pos = NULL; - if (nr && nr >= get_nr_threads(leader)) - goto out; + if (nr >= get_nr_threads(task)) + goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ - for (pos = leader; nr > 0; --nr) { - pos = next_thread(pos); - if (pos == leader) { - pos = NULL; - goto out; - } - } + pos = task = task->group_leader; + do { + if (!nr--) + goto found; + } while_each_thread(task, pos); +fail: + pos = NULL; + goto out; found: get_task_struct(pos); out: @@ -3286,83 +3323,49 @@ return pos; } -static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, int tid) -{ - char name[PROC_NUMBUF]; - int len = snprintf(name, sizeof(name), "%d", tid); - return proc_fill_cache(filp, dirent, filldir, name, len, - proc_task_instantiate, task, NULL); -} - /* for the /proc/TGID/task/ directories */ -static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) +static int proc_task_readdir(struct file *file, struct dir_context *ctx) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - struct task_struct *leader = NULL; + struct inode *inode = file_inode(file); struct task_struct *task; - int retval = -ENOENT; - ino_t ino; - int tid; struct pid_namespace *ns; + int tid; - task = get_proc_task(inode); - if (!task) - goto out_no_task; - rcu_read_lock(); - if (pid_alive(task)) { - leader = task->group_leader; - get_task_struct(leader); - } - rcu_read_unlock(); - put_task_struct(task); - if (!leader) - goto out_no_task; - retval = 0; + if (proc_inode_is_dead(inode)) + return -ENOENT; - switch ((unsigned long)filp->f_pos) { - case 0: - ino = inode->i_ino; - if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - /* fall through */ - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - /* fall through */ - } + if (!dir_emit_dots(file, ctx)) + return 0; /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = filp->f_dentry->d_sb->s_fs_info; - tid = (int)filp->f_version; - filp->f_version = 0; - for (task = first_tid(leader, tid, filp->f_pos - 2, ns); + ns = inode->i_sb->s_fs_info; + tid = (int)file->f_version; + file->f_version = 0; + for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; - task = next_tid(task), filp->f_pos++) { + task = next_tid(task), ctx->pos++) { + char name[PROC_NUMBUF]; + int len; tid = task_pid_nr_ns(task, ns); - if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { + len = snprintf(name, sizeof(name), "%d", tid); + if (!proc_fill_cache(file, ctx, name, len, + proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first * pid for the next readir call */ - filp->f_version = (u64)tid; + file->f_version = (u64)tid; put_task_struct(task); break; } } -out: - put_task_struct(leader); -out_no_task: - return retval; + + return 0; } static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct task_struct *p = get_proc_task(inode); generic_fillattr(inode, stat); @@ -3383,6 +3386,6 @@ static const struct file_operations proc_task_operations = { .read = generic_read_dir, - .readdir = proc_task_readdir, + .iterate = proc_task_readdir, .llseek = default_llseek, };