--- zzzz-none-000/linux-3.10.107/ipc/sem.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/ipc/sem.c 2021-02-04 17:41:59.000000000 +0000 @@ -47,8 +47,7 @@ * Thus: Perfect SMP scaling between independent semaphore arrays. * If multiple semaphores in one array are used, then cache line * trashing on the semaphore array spinlock will limit the scaling. - * - semncnt and semzcnt are calculated on demand in count_semncnt() and - * count_semzcnt() + * - semncnt and semzcnt are calculated on demand in count_semcnt() * - the task that performs a successful semop() scans the list of all * sleeping tasks and completes any pending operations that can be fulfilled. * Semaphores are actively given to waiting tasks (necessary for FIFO). @@ -87,7 +86,7 @@ #include #include -#include +#include #include "util.h" /* One semaphore structure for each semaphore in the system. */ @@ -110,6 +109,7 @@ int pid; /* process id of requesting process */ int status; /* completion status of operation */ struct sembuf *sops; /* array of pending operations */ + struct sembuf *blocking; /* the operation that blocked */ int nsops; /* number of operations */ int alter; /* does *sops alter the array? */ }; @@ -155,14 +155,21 @@ /* * Locking: + * a) global sem_lock() for read/write * sem_undo.id_next, * sem_array.complex_count, - * sem_array.pending{_alter,_cont}, - * sem_array.sem_undo: global sem_lock() for read/write - * sem_undo.proc_next: only "current" is allowed to read/write that field. - * + * sem_array.complex_mode + * sem_array.pending{_alter,_const}, + * sem_array.sem_undo + * + * b) global or semaphore sem_lock() for read/write: * sem_array.sem_base[i].pending_{const,alter}: - * global or semaphore sem_lock() for read/write + * sem_array.complex_mode (for read) + * + * c) special: + * sem_undo_list.list_proc: + * * undo_list->lock for write + * * rcu for read */ #define sc_semmsl sem_ctls[0] @@ -188,9 +195,11 @@ } #endif -void __init sem_init (void) +void __init sem_init(void) { sem_init_ns(&init_ipc_ns); + if (IS_ENABLED(CONFIG_PROC_STRIPPED)) + return; ipc_init_proc_interface("sysvipc/sem", " key semid perms nsems uid gid cuid cgid otime ctime\n", IPC_SEM_IDS, sysvipc_sem_proc_show); @@ -225,7 +234,7 @@ } /** - * merge_queues - Merge single semop queues into global queue + * merge_queues - merge single semop queues into global queue * @sma: semaphore array * * This function merges all per-semaphore queues into the global queue. @@ -263,16 +272,25 @@ #define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb() /* - * Wait until all currently ongoing simple ops have completed. + * Enter the mode suitable for non-simple operations: * Caller must own sem_perm.lock. - * New simple ops cannot start, because simple ops first check - * that sem_perm.lock is free. */ -static void sem_wait_array(struct sem_array *sma) +static void complexmode_enter(struct sem_array *sma) { int i; struct sem *sem; + if (sma->complex_mode) { + /* We are already in complex_mode. Nothing to do */ + return; + } + + /* We need a full barrier after seting complex_mode: + * The write to complex_mode must be visible + * before we read the first sem->lock spinlock state. + */ + smp_store_mb(sma->complex_mode, true); + for (i = 0; i < sma->sem_nsems; i++) { sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); @@ -281,6 +299,28 @@ } /* + * Try to leave the mode that disallows simple operations: + * Caller must own sem_perm.lock. + */ +static void complexmode_tryleave(struct sem_array *sma) +{ + if (sma->complex_count) { + /* Complex ops are sleeping. + * We must stay in complex mode + */ + return; + } + /* + * Immediately after setting complex_mode to false, + * a simple op can start. Thus: all memory writes + * performed by the current operation must be visible + * before we set complex_mode to false. + */ + smp_store_release(&sma->complex_mode, false); +} + +#define SEM_GLOBAL_LOCK (-1) +/* * If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. * Otherwise, lock the entire semaphore array, since we either have @@ -296,55 +336,42 @@ /* Complex operation - acquire a full lock */ ipc_lock_object(&sma->sem_perm); - /* And wait until all simple ops that are processed - * right now have dropped their locks. - */ - sem_wait_array(sma); - return -1; + /* Prevent parallel simple ops */ + complexmode_enter(sma); + return SEM_GLOBAL_LOCK; } /* * Only one semaphore affected - try to optimize locking. - * The rules are: - * - optimized locking is possible if no complex operation - * is either enqueued or processed right now. - * - The test for enqueued complex ops is simple: - * sma->complex_count != 0 - * - Testing for complex ops that are processed right now is - * a bit more difficult. Complex ops acquire the full lock - * and first wait that the running simple ops have completed. - * (see above) - * Thus: If we own a simple lock and the global lock is free - * and complex_count is now 0, then it will stay 0 and - * thus just locking sem->lock is sufficient. + * Optimized locking is possible if no complex operation + * is either enqueued or processed right now. + * + * Both facts are tracked by complex_mode. */ sem = sma->sem_base + sops->sem_num; - if (sma->complex_count == 0) { + /* + * Initial check for complex_mode. Just an optimization, + * no locking, no memory barrier. + */ + if (!sma->complex_mode) { /* * It appears that no complex operation is around. * Acquire the per-semaphore lock. */ spin_lock(&sem->lock); - /* Then check that the global lock is free */ - if (!spin_is_locked(&sma->sem_perm.lock)) { - /* - * We need a memory barrier with acquire semantics, - * otherwise we can race with another thread that does: - * complex_count++; - * spin_unlock(sem_perm.lock); - */ - ipc_smp_acquire__after_spin_is_unlocked(); + /* + * See 51d7d5205d33 + * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): + * A full barrier is required: the write of sem->lock + * must be visible before the read is executed + */ + smp_mb(); - /* Now repeat the test of complex_count: - * It can't change anymore until we drop sem->lock. - * Thus: if is now 0, then it will stay 0. - */ - if (sma->complex_count == 0) { - /* fast path successful! */ - return sops->sem_num; - } + if (!smp_load_acquire(&sma->complex_mode)) { + /* fast path successful! */ + return sops->sem_num; } spin_unlock(&sem->lock); } @@ -364,15 +391,16 @@ /* Not a false alarm, thus complete the sequence for a * full lock. */ - sem_wait_array(sma); - return -1; + complexmode_enter(sma); + return SEM_GLOBAL_LOCK; } } static inline void sem_unlock(struct sem_array *sma, int locknum) { - if (locknum == -1) { + if (locknum == SEM_GLOBAL_LOCK) { unmerge_queues(sma); + complexmode_tryleave(sma); ipc_unlock_object(&sma->sem_perm); } else { struct sem *sem = sma->sem_base + locknum; @@ -392,7 +420,7 @@ struct kern_ipc_perm *ipcp; struct sem_array *sma; - ipcp = ipc_obtain_object(&sem_ids(ns), id); + ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); @@ -402,7 +430,7 @@ /* ipc_rmid() may have already freed the ID while sem_lock * was spinning: verify that the structure is still valid */ - if (!ipcp->deleted) + if (ipc_valid_object(ipcp)) return container_of(ipcp, struct sem_array, sem_perm); sem_unlock(sma, *locknum); @@ -411,7 +439,7 @@ static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) { - struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); if (IS_ERR(ipcp)) return ERR_CAST(ipcp); @@ -433,7 +461,7 @@ static inline void sem_lock_and_putref(struct sem_array *sma) { sem_lock(sma, NULL, -1); - ipc_rcu_putref(sma, ipc_rcu_free); + ipc_rcu_putref(sma, sem_rcu_free); } static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) @@ -453,11 +481,11 @@ * * call wake_up_process * * set queue.status to the final value. * - the previously blocked thread checks queue.status: - * * if it's IN_WAKEUP, then it must wait until the value changes - * * if it's not -EINTR, then the operation was completed by - * update_queue. semtimedop can return queue.status without - * performing any operation on the sem array. - * * otherwise it must acquire the spinlock and check what's up. + * * if it's IN_WAKEUP, then it must wait until the value changes + * * if it's not -EINTR, then the operation was completed by + * update_queue. semtimedop can return queue.status without + * performing any operation on the sem array. + * * otherwise it must acquire the spinlock and check what's up. * * The two-stage algorithm is necessary to protect against the following * races: @@ -482,7 +510,6 @@ * * Called with sem_ids.rwsem held (as a writer) */ - static int newary(struct ipc_namespace *ns, struct ipc_params *params) { int id; @@ -499,12 +526,12 @@ if (ns->used_sems + nsems > ns->sc_semmns) return -ENOSPC; - size = sizeof (*sma) + nsems * sizeof (struct sem); + size = sizeof(*sma) + nsems * sizeof(struct sem); sma = ipc_rcu_alloc(size); - if (!sma) { + if (!sma) return -ENOMEM; - } - memset (sma, 0, size); + + memset(sma, 0, size); sma->sem_perm.mode = (semflg & S_IRWXUGO); sma->sem_perm.key = key; @@ -516,13 +543,6 @@ return retval; } - id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); - if (id < 0) { - ipc_rcu_putref(sma, sem_rcu_free); - return id; - } - ns->used_sems += nsems; - sma->sem_base = (struct sem *) &sma[1]; for (i = 0; i < nsems; i++) { @@ -532,11 +552,20 @@ } sma->complex_count = 0; + sma->complex_mode = true; /* dropped by sem_unlock below */ INIT_LIST_HEAD(&sma->pending_alter); INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); + + id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); + if (id < 0) { + ipc_rcu_putref(sma, sem_rcu_free); + return id; + } + ns->used_sems += nsems; + sem_unlock(sma, -1); rcu_read_unlock(); @@ -573,7 +602,11 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) { struct ipc_namespace *ns; - struct ipc_ops sem_ops; + static const struct ipc_ops sem_ops = { + .getnew = newary, + .associate = sem_security, + .more_checks = sem_more_checks, + }; struct ipc_params sem_params; ns = current->nsproxy->ipc_ns; @@ -581,10 +614,6 @@ if (nsems < 0 || nsems > ns->sc_semmsl) return -EINVAL; - sem_ops.getnew = newary; - sem_ops.associate = sem_security; - sem_ops.more_checks = sem_more_checks; - sem_params.key = key; sem_params.flg = semflg; sem_params.u.nsems = nsems; @@ -592,30 +621,32 @@ return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } -/** perform_atomic_semop - Perform (if possible) a semaphore operation +/** + * perform_atomic_semop - Perform (if possible) a semaphore operation * @sma: semaphore array - * @sops: array with operations that should be checked - * @nsems: number of sops - * @un: undo array - * @pid: pid that did the change + * @q: struct sem_queue that describes the operation * * Returns 0 if the operation was possible. * Returns 1 if the operation is impossible, the caller must sleep. * Negative values are error codes. */ - -static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, - int nsops, struct sem_undo *un, int pid) +static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) { - int result, sem_op; + int result, sem_op, nsops, pid; struct sembuf *sop; - struct sem * curr; + struct sem *curr; + struct sembuf *sops; + struct sem_undo *un; + + sops = q->sops; + nsops = q->nsops; + un = q->undo; for (sop = sops; sop < sops + nsops; sop++) { curr = sma->sem_base + sop->sem_num; sem_op = sop->sem_op; result = curr->semval; - + if (!sem_op && result) goto would_block; @@ -624,25 +655,25 @@ goto would_block; if (result > SEMVMX) goto out_of_range; + if (sop->sem_flg & SEM_UNDO) { int undo = un->semadj[sop->sem_num] - sem_op; - /* - * Exceeding the undo range is an error. - */ + /* Exceeding the undo range is an error. */ if (undo < (-SEMAEM - 1) || undo > SEMAEM) goto out_of_range; + un->semadj[sop->sem_num] = undo; } + curr->semval = result; } sop--; + pid = q->pid; while (sop >= sops) { sma->sem_base[sop->sem_num].sempid = pid; - if (sop->sem_flg & SEM_UNDO) - un->semadj[sop->sem_num] -= sop->sem_op; sop--; } - + return 0; out_of_range: @@ -650,6 +681,8 @@ goto undo; would_block: + q->blocking = sop; + if (sop->sem_flg & IPC_NOWAIT) result = -EAGAIN; else @@ -658,7 +691,10 @@ undo: sop--; while (sop >= sops) { - sma->sem_base[sop->sem_num].semval -= sop->sem_op; + sem_op = sop->sem_op; + sma->sem_base[sop->sem_num].semval -= sem_op; + if (sop->sem_flg & SEM_UNDO) + un->semadj[sop->sem_num] += sem_op; sop--; } @@ -688,7 +724,7 @@ } /** - * wake_up_sem_queue_do(pt) - do the actual wake-up + * wake_up_sem_queue_do - do the actual wake-up * @pt: list of tasks to be woken up * * Do the actual wake-up. @@ -754,7 +790,7 @@ } /** - * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks + * wake_const_ops - wake up non-alter tasks * @sma: semaphore array. * @semnum: semaphore that was modified. * @pt: list head for the tasks that must be woken up. @@ -787,8 +823,7 @@ q = container_of(walk, struct sem_queue, list); walk = walk->next; - error = perform_atomic_semop(sma, q->sops, q->nsops, - q->undo, q->pid); + error = perform_atomic_semop(sma, q); if (error <= 0) { /* operation completed, remove from queue & wakeup */ @@ -804,15 +839,14 @@ } /** - * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks + * do_smart_wakeup_zero - wakeup all wait for zero tasks * @sma: semaphore array * @sops: operations that were performed * @nsops: number of operations * @pt: list head of the tasks that must be woken up. * - * do_smart_wakeup_zero() checks all required queue for wait-for-zero - * operations, based on the actual changes that were performed on the - * semaphore array. + * Checks all required queue for wait-for-zero operations, based + * on the actual changes that were performed on the semaphore array. * The function returns 1 if at least one operation was completed successfully. */ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, @@ -856,7 +890,7 @@ /** - * update_queue(sma, semnum): Look for tasks that can be completed. + * update_queue - look for tasks that can be completed. * @sma: semaphore array. * @semnum: semaphore that was modified. * @pt: list head for the tasks that must be woken up. @@ -901,8 +935,7 @@ if (semnum != -1 && sma->sem_base[semnum].semval == 0) break; - error = perform_atomic_semop(sma, q->sops, q->nsops, - q->undo, q->pid); + error = perform_atomic_semop(sma, q); /* Does q->sleeper still need to sleep? */ if (error > 0) @@ -926,7 +959,7 @@ } /** - * set_semotime(sma, sops) - set sem_otime + * set_semotime - set sem_otime * @sma: semaphore array * @sops: operations that modified the array, may be NULL * @@ -944,7 +977,7 @@ } /** - * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue + * do_smart_update - optimized update_queue * @sma: semaphore array * @sops: operations that were performed * @nsops: number of operations @@ -997,65 +1030,74 @@ set_semotime(sma, sops); } -/* The following counts are associated to each semaphore: - * semncnt number of tasks waiting on semval being nonzero - * semzcnt number of tasks waiting on semval being zero - * This model assumes that a task waits on exactly one semaphore. - * Since semaphore operations are to be performed atomically, tasks actually - * wait on a whole sequence of semaphores simultaneously. - * The counts we return here are a rough approximation, but still - * warrant that semncnt+semzcnt>0 if the task is on the pending queue. - */ -static int count_semncnt (struct sem_array * sma, ushort semnum) -{ - int semncnt; - struct sem_queue * q; - - semncnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) { - struct sembuf * sops = q->sops; - BUG_ON(sops->sem_num != semnum); - if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) - semncnt++; - } +/* + * check_qop: Test if a queued operation sleeps on the semaphore semnum + */ +static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, + bool count_zero) +{ + struct sembuf *sop = q->blocking; - list_for_each_entry(q, &sma->pending_alter, list) { - struct sembuf * sops = q->sops; - int nsops = q->nsops; - int i; - for (i = 0; i < nsops; i++) - if (sops[i].sem_num == semnum - && (sops[i].sem_op < 0) - && !(sops[i].sem_flg & IPC_NOWAIT)) - semncnt++; - } - return semncnt; + /* + * Linux always (since 0.99.10) reported a task as sleeping on all + * semaphores. This violates SUS, therefore it was changed to the + * standard compliant behavior. + * Give the administrators a chance to notice that an application + * might misbehave because it relies on the Linux behavior. + */ + pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" + "The task %s (%d) triggered the difference, watch for misbehavior.\n", + current->comm, task_pid_nr(current)); + + if (sop->sem_num != semnum) + return 0; + + if (count_zero && sop->sem_op == 0) + return 1; + if (!count_zero && sop->sem_op < 0) + return 1; + + return 0; } -static int count_semzcnt (struct sem_array * sma, ushort semnum) +/* The following counts are associated to each semaphore: + * semncnt number of tasks waiting on semval being nonzero + * semzcnt number of tasks waiting on semval being zero + * + * Per definition, a task waits only on the semaphore of the first semop + * that cannot proceed, even if additional operation would block, too. + */ +static int count_semcnt(struct sem_array *sma, ushort semnum, + bool count_zero) { - int semzcnt; - struct sem_queue * q; + struct list_head *l; + struct sem_queue *q; + int semcnt; - semzcnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) { - struct sembuf * sops = q->sops; - BUG_ON(sops->sem_num != semnum); - if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) - semzcnt++; + semcnt = 0; + /* First: check the simple operations. They are easy to evaluate */ + if (count_zero) + l = &sma->sem_base[semnum].pending_const; + else + l = &sma->sem_base[semnum].pending_alter; + + list_for_each_entry(q, l, list) { + /* all task on a per-semaphore list sleep on exactly + * that semaphore + */ + semcnt++; } - list_for_each_entry(q, &sma->pending_const, list) { - struct sembuf * sops = q->sops; - int nsops = q->nsops; - int i; - for (i = 0; i < nsops; i++) - if (sops[i].sem_num == semnum - && (sops[i].sem_op == 0) - && !(sops[i].sem_flg & IPC_NOWAIT)) - semzcnt++; + /* Then: check the complex operations. */ + list_for_each_entry(q, &sma->pending_alter, list) { + semcnt += check_qop(sma, semnum, q, count_zero); + } + if (count_zero) { + list_for_each_entry(q, &sma->pending_const, list) { + semcnt += check_qop(sma, semnum, q, count_zero); + } } - return semzcnt; + return semcnt; } /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked @@ -1116,7 +1158,7 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) { - switch(version) { + switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: @@ -1159,7 +1201,7 @@ int err; struct sem_array *sma; - switch(cmd) { + switch (cmd) { case IPC_INFO: case SEM_INFO: { @@ -1169,8 +1211,8 @@ err = security_sem_semctl(NULL, cmd); if (err) return err; - - memset(&seminfo,0,sizeof(seminfo)); + + memset(&seminfo, 0, sizeof(seminfo)); seminfo.semmni = ns->sc_semmni; seminfo.semmns = ns->sc_semmns; seminfo.semmsl = ns->sc_semmsl; @@ -1189,9 +1231,9 @@ } max_id = ipc_get_maxid(&sem_ids(ns)); up_read(&sem_ids(ns).rwsem); - if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) + if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) return -EFAULT; - return (max_id < 0) ? 0: max_id; + return (max_id < 0) ? 0 : max_id; } case IPC_STAT: case SEM_STAT: @@ -1247,7 +1289,7 @@ { struct sem_undo *un; struct sem_array *sma; - struct sem* curr; + struct sem *curr; int err; struct list_head tasks; int val; @@ -1290,7 +1332,7 @@ sem_lock(sma, NULL, -1); - if (sma->sem_perm.deleted) { + if (!ipc_valid_object(&sma->sem_perm)) { sem_unlock(sma, -1); rcu_read_unlock(); return -EIDRM; @@ -1317,10 +1359,10 @@ int cmd, void __user *p) { struct sem_array *sma; - struct sem* curr; + struct sem *curr; int err, nsems; ushort fast_sem_io[SEMMSL_FAST]; - ushort* sem_io = fast_sem_io; + ushort *sem_io = fast_sem_io; struct list_head tasks; INIT_LIST_HEAD(&tasks); @@ -1350,11 +1392,11 @@ int i; sem_lock(sma, NULL, -1); - if (sma->sem_perm.deleted) { + if (!ipc_valid_object(&sma->sem_perm)) { err = -EIDRM; goto out_unlock; } - if(nsems > SEMMSL_FAST) { + if (nsems > SEMMSL_FAST) { if (!ipc_rcu_getref(sma)) { err = -EIDRM; goto out_unlock; @@ -1362,14 +1404,14 @@ sem_unlock(sma, -1); rcu_read_unlock(); sem_io = ipc_alloc(sizeof(ushort)*nsems); - if(sem_io == NULL) { - ipc_rcu_putref(sma, ipc_rcu_free); + if (sem_io == NULL) { + ipc_rcu_putref(sma, sem_rcu_free); return -ENOMEM; } rcu_read_lock(); sem_lock_and_putref(sma); - if (sma->sem_perm.deleted) { + if (!ipc_valid_object(&sma->sem_perm)) { err = -EIDRM; goto out_unlock; } @@ -1379,7 +1421,7 @@ sem_unlock(sma, -1); rcu_read_unlock(); err = 0; - if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) + if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) err = -EFAULT; goto out_free; } @@ -1394,30 +1436,30 @@ } rcu_read_unlock(); - if(nsems > SEMMSL_FAST) { + if (nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); - if(sem_io == NULL) { - ipc_rcu_putref(sma, ipc_rcu_free); + if (sem_io == NULL) { + ipc_rcu_putref(sma, sem_rcu_free); return -ENOMEM; } } - if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { - ipc_rcu_putref(sma, ipc_rcu_free); + if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { + ipc_rcu_putref(sma, sem_rcu_free); err = -EFAULT; goto out_free; } for (i = 0; i < nsems; i++) { if (sem_io[i] > SEMVMX) { - ipc_rcu_putref(sma, ipc_rcu_free); + ipc_rcu_putref(sma, sem_rcu_free); err = -ERANGE; goto out_free; } } rcu_read_lock(); sem_lock_and_putref(sma); - if (sma->sem_perm.deleted) { + if (!ipc_valid_object(&sma->sem_perm)) { err = -EIDRM; goto out_unlock; } @@ -1443,7 +1485,7 @@ goto out_rcu_wakeup; sem_lock(sma, NULL, -1); - if (sma->sem_perm.deleted) { + if (!ipc_valid_object(&sma->sem_perm)) { err = -EIDRM; goto out_unlock; } @@ -1457,10 +1499,10 @@ err = curr->sempid; goto out_unlock; case GETNCNT: - err = count_semncnt(sma,semnum); + err = count_semcnt(sma, semnum, 0); goto out_unlock; case GETZCNT: - err = count_semzcnt(sma,semnum); + err = count_semcnt(sma, semnum, 1); goto out_unlock; } @@ -1470,7 +1512,7 @@ rcu_read_unlock(); wake_up_sem_queue_do(&tasks); out_free: - if(sem_io != fast_sem_io) + if (sem_io != fast_sem_io) ipc_free(sem_io, sizeof(ushort)*nsems); return err; } @@ -1478,7 +1520,7 @@ static inline unsigned long copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) { - switch(version) { + switch (version) { case IPC_64: if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; @@ -1487,7 +1529,7 @@ { struct semid_ds tbuf_old; - if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) + if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; out->sem_perm.uid = tbuf_old.sem_perm.uid; @@ -1514,7 +1556,7 @@ struct semid64_ds semid64; struct kern_ipc_perm *ipcp; - if(cmd == IPC_SET) { + if (cmd == IPC_SET) { if (copy_semid_from_user(&semid64, p, version)) return -EFAULT; } @@ -1574,7 +1616,7 @@ version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; - switch(cmd) { + switch (cmd) { case IPC_INFO: case SEM_INFO: case IPC_STAT: @@ -1642,7 +1684,7 @@ { struct sem_undo *un; - assert_spin_locked(&ulp->lock); + assert_spin_locked(&ulp->lock); un = __lookup_undo(ulp, semid); if (un) { @@ -1653,7 +1695,7 @@ } /** - * find_alloc_undo - Lookup (and if not present create) undo array + * find_alloc_undo - lookup (and if not present create) undo array * @ns: namespace * @semid: semaphore array id * @@ -1678,7 +1720,7 @@ spin_lock(&ulp->lock); un = lookup_undo(ulp, semid); spin_unlock(&ulp->lock); - if (likely(un!=NULL)) + if (likely(un != NULL)) goto out; /* no undo structure around - allocate one. */ @@ -1700,14 +1742,14 @@ /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { - ipc_rcu_putref(sma, ipc_rcu_free); + ipc_rcu_putref(sma, sem_rcu_free); return ERR_PTR(-ENOMEM); } /* step 3: Acquire the lock on semaphore array */ rcu_read_lock(); sem_lock_and_putref(sma); - if (sma->sem_perm.deleted) { + if (!ipc_valid_object(&sma->sem_perm)) { sem_unlock(sma, -1); rcu_read_unlock(); kfree(new); @@ -1743,7 +1785,7 @@ /** - * get_queue_result - Retrieve the result code from sem_queue + * get_queue_result - retrieve the result code from sem_queue * @q: Pointer to queue structure * * Retrieve the return code from the pending queue. If IN_WAKEUP is found in @@ -1773,7 +1815,7 @@ int error = -EINVAL; struct sem_array *sma; struct sembuf fast_sops[SEMOPM_FAST]; - struct sembuf* sops = fast_sops, *sop; + struct sembuf *sops = fast_sops, *sop; struct sem_undo *un; int undos = 0, alter = 0, max, locknum; struct sem_queue queue; @@ -1787,13 +1829,13 @@ return -EINVAL; if (nsops > ns->sc_semopm) return -E2BIG; - if(nsops > SEMOPM_FAST) { - sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); - if(sops==NULL) + if (nsops > SEMOPM_FAST) { + sops = kmalloc(sizeof(*sops)*nsops, GFP_KERNEL); + if (sops == NULL) return -ENOMEM; } - if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { - error=-EFAULT; + if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { + error = -EFAULT; goto out_free; } if (timeout) { @@ -1854,7 +1896,15 @@ error = -EIDRM; locknum = sem_lock(sma, sops, nsops); - if (sma->sem_perm.deleted) + /* + * We eventually might perform the following check in a lockless + * fashion, considering ipc_valid_object() locking constraints. + * If nsops == 1 and there is no contention for sem_perm.lock, then + * only a per-semaphore lock is held and it's OK to proceed with the + * check below. More details on the fine grained locking scheme + * entangled here and why it's RMID race safe on comments at sem_lock() + */ + if (!ipc_valid_object(&sma->sem_perm)) goto out_unlock_free; /* * semid identifiers are not unique - find_alloc_undo may have @@ -1866,8 +1916,13 @@ if (un && un->semid == -1) goto out_unlock_free; - error = perform_atomic_semop(sma, sops, nsops, un, - task_tgid_vnr(current)); + queue.sops = sops; + queue.nsops = nsops; + queue.undo = un; + queue.pid = task_tgid_vnr(current); + queue.alter = alter; + + error = perform_atomic_semop(sma, &queue); if (error == 0) { /* If the operation was successful, then do * the required updates. @@ -1883,12 +1938,6 @@ /* We need to sleep on this operation, so we put the current * task into the pending queue and go to sleep. */ - - queue.sops = sops; - queue.nsops = nsops; - queue.undo = un; - queue.pid = task_tgid_vnr(current); - queue.alter = alter; if (nsops == 1) { struct sem *curr; @@ -1922,7 +1971,7 @@ queue.sleeper = current; sleep_again: - current->state = TASK_INTERRUPTIBLE; + __set_current_state(TASK_INTERRUPTIBLE); sem_unlock(sma, locknum); rcu_read_unlock(); @@ -1967,10 +2016,8 @@ * If queue.status != -EINTR we are woken up by another process. * Leave without unlink_queue(), but with sem_unlock(). */ - - if (error != -EINTR) { + if (error != -EINTR) goto out_unlock_free; - } /* * If an interrupt occurred we have to clean up the queue @@ -1992,7 +2039,7 @@ rcu_read_unlock(); wake_up_sem_queue_do(&tasks); out_free: - if(sops != fast_sops) + if (sops != fast_sops) kfree(sops); return error; } @@ -2018,7 +2065,7 @@ return error; atomic_inc(&undo_list->refcnt); tsk->sysvsem.undo_list = undo_list; - } else + } else tsk->sysvsem.undo_list = NULL; return 0; @@ -2087,7 +2134,7 @@ sem_lock(sma, NULL, -1); /* exit_sem raced with IPC_RMID, nothing to do */ - if (sma->sem_perm.deleted) { + if (!ipc_valid_object(&sma->sem_perm)) { sem_unlock(sma, -1); rcu_read_unlock(); continue; @@ -2106,13 +2153,15 @@ ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - spin_lock(&ulp->lock); + /* we are the last process using this ulp, acquiring ulp->lock + * isn't required. Besides that, we are also protected against + * IPC_RMID as we hold sma->sem_perm lock now + */ list_del_rcu(&un->list_proc); - spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { - struct sem * semaphore = &sma->sem_base[i]; + struct sem *semaphore = &sma->sem_base[i]; if (un->semadj[i]) { semaphore->semval += un->semadj[i]; /* @@ -2126,7 +2175,7 @@ * Linux caps the semaphore value, both at 0 * and at SEMVMX. * - * Manfred + * Manfred */ if (semaphore->semval < 0) semaphore->semval = 0; @@ -2157,24 +2206,28 @@ /* * The proc interface isn't aware of sem_lock(), it calls * ipc_lock_object() directly (in sysvipc_find_ipc). - * In order to stay compatible with sem_lock(), we must wait until - * all simple semop() calls have left their critical regions. + * In order to stay compatible with sem_lock(), we must + * enter / leave complex_mode. */ - sem_wait_array(sma); + complexmode_enter(sma); sem_otime = get_semotime(sma); - return seq_printf(s, - "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", - sma->sem_perm.key, - sma->sem_perm.id, - sma->sem_perm.mode, - sma->sem_nsems, - from_kuid_munged(user_ns, sma->sem_perm.uid), - from_kgid_munged(user_ns, sma->sem_perm.gid), - from_kuid_munged(user_ns, sma->sem_perm.cuid), - from_kgid_munged(user_ns, sma->sem_perm.cgid), - sem_otime, - sma->sem_ctime); + seq_printf(s, + "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", + sma->sem_perm.key, + sma->sem_perm.id, + sma->sem_perm.mode, + sma->sem_nsems, + from_kuid_munged(user_ns, sma->sem_perm.uid), + from_kgid_munged(user_ns, sma->sem_perm.gid), + from_kuid_munged(user_ns, sma->sem_perm.cuid), + from_kgid_munged(user_ns, sma->sem_perm.cgid), + sem_otime, + sma->sem_ctime); + + complexmode_tryleave(sma); + + return 0; } #endif