--- zzzz-none-000/linux-3.10.107/net/socket.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/socket.c 2021-02-04 17:41:59.000000000 +0000 @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -104,12 +105,16 @@ #include #include #include +#include +#include -static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); +#ifdef CONFIG_NET_RX_BUSY_POLL +unsigned int sysctl_net_busy_read __read_mostly; +unsigned int sysctl_net_busy_poll __read_mostly; +#endif + +static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to); +static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); @@ -135,15 +140,14 @@ static const struct file_operations socket_file_ops = { .owner = THIS_MODULE, .llseek = no_llseek, - .aio_read = sock_aio_read, - .aio_write = sock_aio_write, + .read_iter = sock_read_iter, + .write_iter = sock_write_iter, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_sock_ioctl, #endif .mmap = sock_mmap, - .open = sock_no_open, /* special open code to disallow open via /proc */ .release = sock_close, .fasync = sock_fasync, .sendpage = sock_sendpage, @@ -253,6 +257,7 @@ } init_waitqueue_head(&wq->wait); wq->fasync_list = NULL; + wq->flags = 0; RCU_INIT_POINTER(ei->socket.wq, wq); ei->socket.state = SS_UNCONNECTED; @@ -308,7 +313,7 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", - dentry->d_inode->i_ino); + d_inode(dentry)->i_ino); } static const struct dentry_operations sockfs_dentry_operations = { @@ -366,13 +371,12 @@ path.mnt = mntget(sock_mnt); d_instantiate(path.dentry, SOCK_INODE(sock)); - SOCK_INODE(sock)->i_fop = &socket_file_ops; file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); - if (unlikely(IS_ERR(file))) { + if (IS_ERR(file)) { /* drop dentry, keep inode */ - ihold(path.dentry->d_inode); + ihold(d_inode(path.dentry)); path_put(&path); return file; } @@ -444,16 +448,17 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { - struct file *file; + struct fd f = fdget(fd); struct socket *sock; *err = -EBADF; - file = fget_light(fd, fput_needed); - if (file) { - sock = sock_from_file(file, err); - if (sock) + if (f.file) { + sock = sock_from_file(f.file, err); + if (likely(sock)) { + *fput_needed = f.flags; return sock; - fput_light(file, *fput_needed); + } + fdput(f); } return NULL; } @@ -493,7 +498,7 @@ ssize_t len; ssize_t used = 0; - len = security_inode_listsecurity(dentry->d_inode, buffer, size); + len = security_inode_listsecurity(d_inode(dentry), buffer, size); if (len < 0) return len; used += len; @@ -550,23 +555,6 @@ return sock; } -/* - * In theory you can't get an open on this inode, but /proc provides - * a back door. Remember to keep it shut otherwise you'll let the - * creepy crawlies in. - */ - -static int sock_no_open(struct inode *irrelevant, struct file *dontcare) -{ - return -ENXIO; -} - -const struct file_operations bad_sock_fops = { - .owner = THIS_MODULE, - .open = sock_no_open, - .llseek = noop_llseek, -}; - /** * sock_release - close a socket * @sock: socket to close @@ -587,10 +575,7 @@ } if (rcu_dereference_protected(sock->wq, 1)->fasync_list) - printk(KERN_ERR "sock_release: fasync list not empty!\n"); - - if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags)) - return; + pr_err("%s: fasync list not empty!\n", __func__); this_cpu_sub(sockets_in_use, 1); if (!sock->file) { @@ -601,84 +586,47 @@ } EXPORT_SYMBOL(sock_release); -void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) +void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) { - *tx_flags = 0; - if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) - *tx_flags |= SKBTX_HW_TSTAMP; - if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) - *tx_flags |= SKBTX_SW_TSTAMP; - if (sock_flag(sk, SOCK_WIFI_STATUS)) - *tx_flags |= SKBTX_WIFI_STATUS; -} -EXPORT_SYMBOL(sock_tx_timestamp); + u8 flags = *tx_flags; -static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) -{ - struct sock_iocb *si = kiocb_to_siocb(iocb); + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE) + flags |= SKBTX_HW_TSTAMP; - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) + flags |= SKBTX_SW_TSTAMP; - return sock->ops->sendmsg(iocb, sock, msg, size); -} + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) + flags |= SKBTX_SCHED_TSTAMP; -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) -{ - int err = security_socket_sendmsg(sock, msg, size); + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK) + flags |= SKBTX_ACK_TSTAMP; - return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); + *tx_flags = flags; } +EXPORT_SYMBOL(__sock_tx_timestamp); -int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) +static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { - struct kiocb iocb; - struct sock_iocb siocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - ret = __sock_sendmsg(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); + BUG_ON(ret == -EIOCBQUEUED); return ret; } -EXPORT_SYMBOL(sock_sendmsg); -static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) +int sock_sendmsg(struct socket *sock, struct msghdr *msg) { - struct kiocb iocb; - struct sock_iocb siocb; - int ret; + int err = security_socket_sendmsg(sock, msg, + msg_data_left(msg)); - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - ret = __sock_sendmsg_nosec(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; + return err ?: sock_sendmsg_nosec(sock, msg); } +EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { - mm_segment_t oldfs = get_fs(); - int result; - - set_fs(KERNEL_DS); - /* - * the following is safe, since for compiler definitions of kvec and - * iovec are identical, yielding the same in-core layout and alignment - */ - msg->msg_iov = (struct iovec *)vec; - msg->msg_iovlen = num; - result = sock_sendmsg(sock, msg, size); - set_fs(oldfs); - return result; + iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); + return sock_sendmsg(sock, msg); } EXPORT_SYMBOL(kernel_sendmsg); @@ -689,7 +637,7 @@ struct sk_buff *skb) { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); - struct timespec ts[3]; + struct scm_timestamping tss; int empty = 1; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); @@ -706,28 +654,24 @@ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); } else { - skb_get_timestampns(skb, &ts[0]); + struct timespec ts; + skb_get_timestampns(skb, &ts); put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(ts[0]), &ts[0]); + sizeof(ts), &ts); } } - - memset(ts, 0, sizeof(ts)); - if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec_cond(skb->tstamp, ts + 0)) + memset(&tss, 0, sizeof(tss)); + if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && + ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) + empty = 0; + if (shhwtstamps && + (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && + ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) empty = 0; - if (shhwtstamps) { - if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1)) - empty = 0; - if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2)) - empty = 0; - } if (!empty) put_cmsg(msg, SOL_SOCKET, - SCM_TIMESTAMPING, sizeof(ts), &ts); + SCM_TIMESTAMPING, sizeof(tss), &tss); } EXPORT_SYMBOL_GPL(__sock_recv_timestamp); @@ -750,9 +694,9 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) + if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount) put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, - sizeof(__u32), &skb->dropcount); + sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); } void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, @@ -763,59 +707,21 @@ } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); -static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) { - struct sock_iocb *si = kiocb_to_siocb(iocb); - - si->sock = sock; - si->scm = NULL; - si->msg = msg; - si->size = size; - si->flags = flags; - - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + return sock->ops->recvmsg(sock, msg, size, flags); } -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, + int flags) { int err = security_socket_recvmsg(sock, msg, size, flags); - return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); -} - -int sock_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - struct sock_iocb siocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; + return err ?: sock_recvmsg_nosec(sock, msg, size, flags); } EXPORT_SYMBOL(sock_recvmsg); -static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct kiocb iocb; - struct sock_iocb siocb; - int ret; - - init_sync_kiocb(&iocb, NULL); - iocb.private = &siocb; - ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); - return ret; -} - /** * kernel_recvmsg - Receive a message from a socket (kernel space) * @sock: The socket to receive the message from @@ -837,23 +743,14 @@ mm_segment_t oldfs = get_fs(); int result; + iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size); set_fs(KERNEL_DS); - /* - * the following is safe, since for compiler definitions of kvec and - * iovec are identical, yielding the same in-core layout and alignment - */ - msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; result = sock_recvmsg(sock, msg, size, flags); set_fs(oldfs); return result; } EXPORT_SYMBOL(kernel_recvmsg); -static void sock_aio_dtor(struct kiocb *iocb) -{ - kfree(iocb->private); -} - static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more) { @@ -881,98 +778,48 @@ return sock->ops->splice_read(sock, ppos, pipe, len, flags); } -static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - struct sock_iocb *siocb) -{ - if (!is_sync_kiocb(iocb)) { - siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); - if (!siocb) - return NULL; - iocb->ki_dtor = sock_aio_dtor; - } - - siocb->kiocb = iocb; - iocb->private = siocb; - return siocb; -} - -static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) +static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; + struct msghdr msg = {.msg_iter = *to, + .msg_iocb = iocb}; + ssize_t res; - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *)iov; - msg->msg_iovlen = nr_segs; - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; + if (file->f_flags & O_NONBLOCK) + msg.msg_flags = MSG_DONTWAIT; - return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); -} - -static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct sock_iocb siocb, *x; - - if (pos != 0) + if (iocb->ki_pos != 0) return -ESPIPE; - if (iocb->ki_left == 0) /* Match SYS5 behaviour */ + if (!iov_iter_count(to)) /* Match SYS5 behaviour */ return 0; - - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; - return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); + res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags); + *to = msg.msg_iter; + return res; } -static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, const struct iovec *iov, - unsigned long nr_segs) +static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; struct socket *sock = file->private_data; - size_t size = 0; - int i; - - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; - - msg->msg_name = NULL; - msg->msg_namelen = 0; - msg->msg_control = NULL; - msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *)iov; - msg->msg_iovlen = nr_segs; - msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; - if (sock->type == SOCK_SEQPACKET) - msg->msg_flags |= MSG_EOR; - - return __sock_sendmsg(iocb, sock, msg, size); -} - -static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct sock_iocb siocb, *x; + struct msghdr msg = {.msg_iter = *from, + .msg_iocb = iocb}; + ssize_t res; - if (pos != 0) + if (iocb->ki_pos != 0) return -ESPIPE; - x = alloc_sock_iocb(iocb, &siocb); - if (!x) - return -ENOMEM; + if (file->f_flags & O_NONBLOCK) + msg.msg_flags = MSG_DONTWAIT; + + if (sock->type == SOCK_SEQPACKET) + msg.msg_flags |= MSG_EOR; - return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); + res = sock_sendmsg(sock, &msg); + *from = msg.msg_iter; + return res; } /* @@ -1043,6 +890,9 @@ void __user *argp = (void __user *)arg; int pid, err; struct net *net; +#ifdef CONFIG_AVM_SK_TC_INDEX + unsigned long tc_index; +#endif sock = file->private_data; sk = sock->sk; @@ -1061,7 +911,8 @@ err = -EFAULT; if (get_user(pid, (int __user *)argp)) break; - err = f_setown(sock->file, pid, 1); + f_setown(sock->file, pid, 1); + err = 0; break; case FIOGETOWN: case SIOCGPGRP: @@ -1103,6 +954,21 @@ err = dlci_ioctl_hook(cmd, argp); mutex_unlock(&dlci_ioctl_mutex); break; + +#ifdef CONFIG_AVM_SK_TC_INDEX + case SIOCSET_TC_INDEX: + err = -EFAULT; + if (get_user(tc_index, (unsigned long __user *)argp)) + break; + sock->sk->sk_tc_index = tc_index; + err = 0; + break; + + case SIOCGET_TC_INDEX: + err = put_user(sock->sk->sk_tc_index, (unsigned long __user *)argp); + break; +#endif + default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@ -1143,13 +1009,24 @@ /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) { + unsigned int busy_flag = 0; struct socket *sock; /* * We can't return errors to poll, so it's either yes or no. */ sock = file->private_data; - return sock->ops->poll(file, sock, wait); + + if (sk_can_busy_loop(sock->sk)) { + /* this socket can poll_ll so tell the system call */ + busy_flag = POLL_BUSY_LOOP; + + /* once, only if requested by syscall */ + if (wait && (wait->_key & POLL_BUSY_LOOP)) + sk_busy_loop(sock->sk, 1); + } + + return busy_flag | sock->ops->poll(file, sock, wait); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) @@ -1198,27 +1075,20 @@ return 0; } -/* This function may be called only under socket lock or callback_lock or rcu_lock */ +/* This function may be called only under rcu_lock */ -int sock_wake_async(struct socket *sock, int how, int band) +int sock_wake_async(struct socket_wq *wq, int how, int band) { - struct socket_wq *wq; - - if (!sock) - return -1; - rcu_read_lock(); - wq = rcu_dereference(sock->wq); - if (!wq || !wq->fasync_list) { - rcu_read_unlock(); + if (!wq || !wq->fasync_list) return -1; - } + switch (how) { case SOCK_WAKE_WAITD: - if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) + if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags)) break; goto call_kill; case SOCK_WAKE_SPACE: - if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) + if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) break; /* fall through */ case SOCK_WAKE_IO: @@ -1228,7 +1098,7 @@ case SOCK_WAKE_URG: kill_fasync(&wq->fasync_list, SIGURG, band); } - rcu_read_unlock(); + return 0; } EXPORT_SYMBOL(sock_wake_async); @@ -1257,8 +1127,8 @@ static int warned; if (!warned) { warned = 1; - printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", - current->comm); + pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n", + current->comm); } family = PF_PACKET; } @@ -1352,9 +1222,9 @@ } EXPORT_SYMBOL(sock_create); -int sock_create_kern(int family, int type, int protocol, struct socket **res) +int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { - return __sock_create(&init_net, family, type, protocol, res, 1); + return __sock_create(net, family, type, protocol, res, 1); } EXPORT_SYMBOL(sock_create_kern); @@ -1437,48 +1307,61 @@ err = fd1; goto out_release_both; } + fd2 = get_unused_fd_flags(flags); if (unlikely(fd2 < 0)) { err = fd2; - put_unused_fd(fd1); - goto out_release_both; + goto out_put_unused_1; } newfile1 = sock_alloc_file(sock1, flags, NULL); - if (unlikely(IS_ERR(newfile1))) { + if (IS_ERR(newfile1)) { err = PTR_ERR(newfile1); - put_unused_fd(fd1); - put_unused_fd(fd2); - goto out_release_both; + goto out_put_unused_both; } newfile2 = sock_alloc_file(sock2, flags, NULL); if (IS_ERR(newfile2)) { err = PTR_ERR(newfile2); - fput(newfile1); - put_unused_fd(fd1); - put_unused_fd(fd2); - sock_release(sock2); - goto out; + goto out_fput_1; } + err = put_user(fd1, &usockvec[0]); + if (err) + goto out_fput_both; + + err = put_user(fd2, &usockvec[1]); + if (err) + goto out_fput_both; + audit_fd_pair(fd1, fd2); + fd_install(fd1, newfile1); fd_install(fd2, newfile2); /* fd1 and fd2 may be already another descriptors. * Not kernel problem. */ - err = put_user(fd1, &usockvec[0]); - if (!err) - err = put_user(fd2, &usockvec[1]); - if (!err) - return 0; + return 0; - sys_close(fd2); - sys_close(fd1); - return err; +out_fput_both: + fput(newfile2); + fput(newfile1); + put_unused_fd(fd2); + put_unused_fd(fd1); + goto out; + +out_fput_1: + fput(newfile1); + put_unused_fd(fd2); + put_unused_fd(fd1); + sock_release(sock2); + goto out; +out_put_unused_both: + put_unused_fd(fd2); +out_put_unused_1: + put_unused_fd(fd1); out_release_both: sock_release(sock2); out_release_1: @@ -1596,7 +1479,7 @@ goto out_put; } newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); - if (unlikely(IS_ERR(newfile))) { + if (IS_ERR(newfile)) { err = PTR_ERR(newfile); put_unused_fd(newfd); sock_release(newsock); @@ -1762,17 +1645,14 @@ struct iovec iov; int fput_needed; - if (len > INT_MAX) - len = INT_MAX; + err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter); + if (unlikely(err)) + return err; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - iov.iov_base = buff; - iov.iov_len = len; msg.msg_name = NULL; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_namelen = 0; @@ -1786,7 +1666,7 @@ if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; - err = sock_sendmsg(sock, &msg, len); + err = sock_sendmsg(sock, &msg); out_put: fput_light(sock->file, fput_needed); @@ -1821,25 +1701,23 @@ int err, err2; int fput_needed; - if (size > INT_MAX) - size = INT_MAX; + err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter); + if (unlikely(err)) + return err; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_iovlen = 1; - msg.msg_iov = &iov; - iov.iov_len = size; - iov.iov_base = ubuf; /* Save some cycles and don't copy the address if not needed */ msg.msg_name = addr ? (struct sockaddr *)&address : NULL; /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; + msg.msg_iocb = NULL; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg, size, flags); + err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags); if (err >= 0 && addr != NULL) { err2 = move_addr_to_user(&address, @@ -1857,8 +1735,8 @@ * Receive a datagram from a socket. */ -asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, - unsigned int flags) +SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, + unsigned int, flags) { return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } @@ -1960,20 +1838,59 @@ }; static int copy_msghdr_from_user(struct msghdr *kmsg, - struct msghdr __user *umsg) + struct user_msghdr __user *umsg, + struct sockaddr __user **save_addr, + struct iovec **iov) { - if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) + struct sockaddr __user *uaddr; + struct iovec __user *uiov; + size_t nr_segs; + ssize_t err; + + if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) || + __get_user(uaddr, &umsg->msg_name) || + __get_user(kmsg->msg_namelen, &umsg->msg_namelen) || + __get_user(uiov, &umsg->msg_iov) || + __get_user(nr_segs, &umsg->msg_iovlen) || + __get_user(kmsg->msg_control, &umsg->msg_control) || + __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || + __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + if (!uaddr) + kmsg->msg_namelen = 0; + if (kmsg->msg_namelen < 0) return -EINVAL; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); - return 0; + + if (save_addr) + *save_addr = uaddr; + + if (uaddr && kmsg->msg_namelen) { + if (!save_addr) { + err = move_addr_to_kernel(uaddr, kmsg->msg_namelen, + kmsg->msg_name); + if (err < 0) + return err; + } + } else { + kmsg->msg_name = NULL; + kmsg->msg_namelen = 0; + } + + if (nr_segs > UIO_MAXIOV) + return -EMSGSIZE; + + kmsg->msg_iocb = NULL; + + return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs, + UIO_FASTIOV, iov, &kmsg->msg_iter); } -static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, +static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address) { @@ -1985,35 +1902,17 @@ __attribute__ ((aligned(sizeof(__kernel_size_t)))); /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; - int err, ctl_len, total_len; + int ctl_len; + ssize_t err; + + msg_sys->msg_name = &address; - err = -EFAULT; if (MSG_CMSG_COMPAT & flags) - err = get_compat_msghdr(msg_sys, msg_compat); + err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov); else - err = copy_msghdr_from_user(msg_sys, msg); - if (err) - return err; - - if (msg_sys->msg_iovlen > UIO_FASTIOV) { - err = -EMSGSIZE; - if (msg_sys->msg_iovlen > UIO_MAXIOV) - goto out; - err = -ENOMEM; - iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec), - GFP_KERNEL); - if (!iov) - goto out; - } - - /* This will also move the address data into kernel space */ - if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ); - } else - err = verify_iovec(msg_sys, iov, &address, VERIFY_READ); + err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov); if (err < 0) - goto out_freeiov; - total_len = err; + return err; err = -ENOBUFS; @@ -2060,10 +1959,10 @@ used_address->name_len == msg_sys->msg_namelen && !memcmp(&used_address->name, msg_sys->msg_name, used_address->name_len)) { - err = sock_sendmsg_nosec(sock, msg_sys, total_len); + err = sock_sendmsg_nosec(sock, msg_sys); goto out_freectl; } - err = sock_sendmsg(sock, msg_sys, total_len); + err = sock_sendmsg(sock, msg_sys); /* * If this is sendmmsg() and sending to current destination address was * successful, remember it. @@ -2079,9 +1978,7 @@ if (ctl_buf != ctl) sock_kfree_s(sock->sk, ctl_buf, ctl_len); out_freeiov: - if (iov != iovstack) - kfree(iov); -out: + kfree(iov); return err; } @@ -2089,7 +1986,7 @@ * BSD sendmsg interface */ -long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) +long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags) { int fput_needed, err; struct msghdr msg_sys; @@ -2106,7 +2003,7 @@ return err; } -SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags) +SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags) { if (flags & MSG_CMSG_COMPAT) return -EINVAL; @@ -2143,7 +2040,7 @@ while (datagrams < vlen) { if (MSG_CMSG_COMPAT & flags) { - err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry, + err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry, &msg_sys, flags, &used_address); if (err < 0) break; @@ -2151,7 +2048,7 @@ ++compat_entry; } else { err = ___sys_sendmsg(sock, - (struct msghdr __user *)entry, + (struct user_msghdr __user *)entry, &msg_sys, flags, &used_address); if (err < 0) break; @@ -2162,6 +2059,8 @@ if (err) break; ++datagrams; + if (msg_data_left(&msg_sys)) + break; } fput_light(sock->file, fput_needed); @@ -2181,7 +2080,7 @@ return __sys_sendmmsg(fd, mmsg, vlen, flags); } -static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, +static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, int nosec) { struct compat_msghdr __user *msg_compat = @@ -2189,45 +2088,25 @@ struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; unsigned long cmsg_ptr; - int err, total_len, len; + int total_len, len; + ssize_t err; /* kernel mode address */ struct sockaddr_storage addr; /* user mode address pointers */ struct sockaddr __user *uaddr; - int __user *uaddr_len; - - if (MSG_CMSG_COMPAT & flags) - err = get_compat_msghdr(msg_sys, msg_compat); - else - err = copy_msghdr_from_user(msg_sys, msg); - if (err) - return err; + int __user *uaddr_len = COMPAT_NAMELEN(msg); - if (msg_sys->msg_iovlen > UIO_FASTIOV) { - err = -EMSGSIZE; - if (msg_sys->msg_iovlen > UIO_MAXIOV) - goto out; - err = -ENOMEM; - iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec), - GFP_KERNEL); - if (!iov) - goto out; - } + msg_sys->msg_name = &addr; - /* Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) - */ - uaddr = (__force void __user *)msg_sys->msg_name; - uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) - err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); + err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov); else - err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); + err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov); if (err < 0) - goto out_freeiov; - total_len = err; + return err; + total_len = iov_iter_count(&msg_sys->msg_iter); cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); @@ -2265,9 +2144,7 @@ err = len; out_freeiov: - if (iov != iovstack) - kfree(iov); -out: + kfree(iov); return err; } @@ -2275,7 +2152,7 @@ * BSD recvmsg interface */ -long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags) +long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags) { int fput_needed, err; struct msghdr msg_sys; @@ -2292,7 +2169,7 @@ return err; } -SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, +SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags) { if (flags & MSG_CMSG_COMPAT) @@ -2339,7 +2216,7 @@ * No need to ask LSM for more than the first datagram. */ if (MSG_CMSG_COMPAT & flags) { - err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry, + err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry, &msg_sys, flags & ~MSG_WAITFORONE, datagrams); if (err < 0) @@ -2348,7 +2225,7 @@ ++compat_entry; } else { err = ___sys_recvmsg(sock, - (struct msghdr __user *)entry, + (struct user_msghdr __user *)entry, &msg_sys, flags & ~MSG_WAITFORONE, datagrams); if (err < 0) @@ -2537,13 +2414,13 @@ (int __user *)a[4]); break; case SYS_SENDMSG: - err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); + err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]); break; case SYS_SENDMMSG: err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); break; case SYS_RECVMSG: - err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); + err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]); break; case SYS_RECVMMSG: err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], @@ -2568,7 +2445,7 @@ * * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the - * socket interface. The value ops->family coresponds to the + * socket interface. The value ops->family corresponds to the * socket system call protocol family. */ int sock_register(const struct net_proto_family *ops) @@ -2576,8 +2453,7 @@ int err; if (ops->family >= NPROTO) { - printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, - NPROTO); + pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); return -ENOBUFS; } @@ -2591,7 +2467,7 @@ } spin_unlock(&net_family_lock); - printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); + pr_info("NET: Registered protocol family %d\n", ops->family); return err; } EXPORT_SYMBOL(sock_register); @@ -2619,7 +2495,7 @@ synchronize_rcu(); - printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); + pr_info("NET: Unregistered protocol family %d\n", family); } EXPORT_SYMBOL(sock_unregister); @@ -2657,12 +2533,12 @@ */ #ifdef CONFIG_NETFILTER - netfilter_init(); + err = netfilter_init(); + if (err) + goto out; #endif -#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING - skb_timestamping_init(); -#endif + ptp_classifier_init(); out: return err; @@ -2960,11 +2836,8 @@ struct compat_ifreq __user *ifr32) { struct ifreq kifr; - struct ifreq __user *uifr; mm_segment_t old_fs; int err; - u32 data; - void __user *datap; switch (cmd) { case SIOCBONDENSLAVE: @@ -2981,26 +2854,13 @@ set_fs(old_fs); return err; - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) - return -EFAULT; - - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) - return -EFAULT; - - datap = compat_ptr(data); - if (put_user(datap, &uifr->ifr_ifru.ifru_data)) - return -EFAULT; - - return dev_ioctl(net, cmd, uifr); default: return -ENOIOCTLCMD; } } -static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, +/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ +static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { struct ifreq __user *u_ifreq64; @@ -3011,19 +2871,16 @@ if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), IFNAMSIZ)) return -EFAULT; - if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) return -EFAULT; data64 = compat_ptr(data32); u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - /* Don't check these user accesses, just let that get trapped - * in the ioctl handler instead. - */ if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ)) return -EFAULT; - if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) + if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) return -EFAULT; return dev_ioctl(net, cmd, u_ifreq64); @@ -3075,12 +2932,12 @@ uifmap32 = &uifr32->ifr_ifru.ifru_map; err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); - err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= __get_user(ifr.ifr_map.port, &uifmap32->port); + err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); + err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); + err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); + err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); + err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); + err |= get_user(ifr.ifr_map.port, &uifmap32->port); if (err) return -EFAULT; @@ -3091,39 +2948,18 @@ if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); - err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= __put_user(ifr.ifr_map.port, &uifmap32->port); + err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); + err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); + err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); + err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); + err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); + err |= put_user(ifr.ifr_map.port, &uifmap32->port); if (err) err = -EFAULT; } return err; } -static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) -{ - void __user *uptr; - compat_uptr_t uptr32; - struct ifreq __user *uifr; - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - if (get_user(uptr32, &uifr32->ifr_data)) - return -EFAULT; - - uptr = compat_ptr(uptr32); - - if (put_user(uptr, &uifr->ifr_data)) - return -EFAULT; - - return dev_ioctl(net, SIOCSHWTSTAMP, uifr); -} - struct rtentry32 { u32 rt_pad1; struct sockaddr rt_dst; /* target address */ @@ -3170,25 +3006,25 @@ struct in6_rtmsg32 __user *ur6 = argp; ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3 * sizeof(struct in6_addr)); - ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); + ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); + ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); + ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); + ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); + ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); + ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); + ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); r = (void *) &r6; } else { /* ipv4 */ struct rtentry32 __user *ur4 = argp; ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3 * sizeof(struct sockaddr)); - ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); - ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); - ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); - ret |= __get_user(r4.rt_window, &(ur4->rt_window)); - ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); - ret |= __get_user(rtdev, &(ur4->rt_dev)); + ret |= get_user(r4.rt_flags, &(ur4->rt_flags)); + ret |= get_user(r4.rt_metric, &(ur4->rt_metric)); + ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu)); + ret |= get_user(r4.rt_window, &(ur4->rt_window)); + ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt)); + ret |= get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { ret |= copy_from_user(devname, compat_ptr(rtdev), 15); r4.rt_dev = (char __user __force *)devname; @@ -3235,7 +3071,7 @@ struct net *net = sock_net(sk); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) - return siocdevprivate_ioctl(net, cmd, argp); + return compat_ifr_data_ioctl(net, cmd, argp); switch (cmd) { case SIOCSIFBR: @@ -3255,8 +3091,6 @@ case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: case SIOCBONDCHANGEACTIVE: return bond_ioctl(net, cmd, argp); case SIOCADDRT: @@ -3266,8 +3100,11 @@ return do_siocgstamp(net, sock, cmd, argp); case SIOCGSTAMPNS: return do_siocgstampns(net, sock, cmd, argp); + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: - return compat_siocshwtstamp(net, argp); + case SIOCGHWTSTAMP: + return compat_ifr_data_ioctl(net, cmd, argp); case FIOSETOWN: case SIOCSPGRP: