aboutsummaryrefslogtreecommitdiffstats
path: root/ipc/shm.c
diff options
context:
space:
mode:
Diffstat (limited to 'ipc/shm.c')
-rw-r--r--ipc/shm.c268
1 files changed, 178 insertions, 90 deletions
diff --git a/ipc/shm.c b/ipc/shm.c
index ce1ca9f7c6e9..7d86f058fb86 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -60,11 +60,20 @@ struct shmid_kernel /* private to the kernel */
time64_t shm_ctim;
struct pid *shm_cprid;
struct pid *shm_lprid;
- struct user_struct *mlock_user;
+ struct ucounts *mlock_ucounts;
- /* The task created the shm object. NULL if the task is dead. */
+ /*
+ * The task created the shm object, for
+ * task_lock(shp->shm_creator)
+ */
struct task_struct *shm_creator;
- struct list_head shm_clist; /* list by creator */
+
+ /*
+ * List by creator. task_lock(->shm_creator) required for read/write.
+ * If list_empty(), then the creator is dead already.
+ */
+ struct list_head shm_clist;
+ struct ipc_namespace *ns;
} __randomize_layout;
/* shm_mode upper byte flags */
@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
struct shmid_kernel *shp;
shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+ WARN_ON(ns != shp->ns);
if (shp->shm_nattch) {
shp->shm_perm.mode |= SHM_DEST;
@@ -222,13 +232,46 @@ static void shm_rcu_free(struct rcu_head *head)
struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
shm_perm);
security_shm_free(&shp->shm_perm);
- kvfree(shp);
+ kfree(shp);
}
-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+/*
+ * It has to be called with shp locked.
+ * It must be called before ipc_rmid()
+ */
+static inline void shm_clist_rm(struct shmid_kernel *shp)
{
- list_del(&s->shm_clist);
- ipc_rmid(&shm_ids(ns), &s->shm_perm);
+ struct task_struct *creator;
+
+ /* ensure that shm_creator does not disappear */
+ rcu_read_lock();
+
+ /*
+ * A concurrent exit_shm may do a list_del_init() as well.
+ * Just do nothing if exit_shm already did the work
+ */
+ if (!list_empty(&shp->shm_clist)) {
+ /*
+ * shp->shm_creator is guaranteed to be valid *only*
+ * if shp->shm_clist is not empty.
+ */
+ creator = shp->shm_creator;
+
+ task_lock(creator);
+ /*
+ * list_del_init() is a nop if the entry was already removed
+ * from the list.
+ */
+ list_del_init(&shp->shm_clist);
+ task_unlock(creator);
+ }
+ rcu_read_unlock();
+}
+
+static inline void shm_rmid(struct shmid_kernel *s)
+{
+ shm_clist_rm(s);
+ ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
}
@@ -283,13 +326,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
shm_file = shp->shm_file;
shp->shm_file = NULL;
ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
- shm_rmid(ns, shp);
+ shm_rmid(shp);
shm_unlock(shp);
if (!is_file_hugepages(shm_file))
- shmem_lock(shm_file, 0, shp->mlock_user);
- else if (shp->mlock_user)
- user_shm_unlock(i_size_read(file_inode(shm_file)),
- shp->mlock_user);
+ shmem_lock(shm_file, 0, shp->mlock_ucounts);
fput(shm_file);
ipc_update_pid(&shp->shm_cprid, NULL);
ipc_update_pid(&shp->shm_lprid, NULL);
@@ -306,10 +346,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
*
* 2) sysctl kernel.shm_rmid_forced is set to 1.
*/
-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+static bool shm_may_destroy(struct shmid_kernel *shp)
{
return (shp->shm_nattch == 0) &&
- (ns->shm_rmid_forced ||
+ (shp->ns->shm_rmid_forced ||
(shp->shm_perm.mode & SHM_DEST));
}
@@ -340,7 +380,7 @@ static void shm_close(struct vm_area_struct *vma)
ipc_update_pid(&shp->shm_lprid, task_tgid(current));
shp->shm_dtim = ktime_get_real_seconds();
shp->shm_nattch--;
- if (shm_may_destroy(ns, shp))
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
@@ -361,10 +401,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
*
* As shp->* are changed under rwsem, it's safe to skip shp locking.
*/
- if (shp->shm_creator != NULL)
+ if (!list_empty(&shp->shm_clist))
return 0;
- if (shm_may_destroy(ns, shp)) {
+ if (shm_may_destroy(shp)) {
shm_lock_by_ptr(shp);
shm_destroy(ns, shp);
}
@@ -382,48 +422,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
/* Locking assumes this will only be called with task == current */
void exit_shm(struct task_struct *task)
{
- struct ipc_namespace *ns = task->nsproxy->ipc_ns;
- struct shmid_kernel *shp, *n;
+ for (;;) {
+ struct shmid_kernel *shp;
+ struct ipc_namespace *ns;
- if (list_empty(&task->sysvshm.shm_clist))
- return;
+ task_lock(task);
+
+ if (list_empty(&task->sysvshm.shm_clist)) {
+ task_unlock(task);
+ break;
+ }
+
+ shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
+ shm_clist);
- /*
- * If kernel.shm_rmid_forced is not set then only keep track of
- * which shmids are orphaned, so that a later set of the sysctl
- * can clean them up.
- */
- if (!ns->shm_rmid_forced) {
- down_read(&shm_ids(ns).rwsem);
- list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
- shp->shm_creator = NULL;
/*
- * Only under read lock but we are only called on current
- * so no entry on the list will be shared.
+ * 1) Get pointer to the ipc namespace. It is worth to say
+ * that this pointer is guaranteed to be valid because
+ * shp lifetime is always shorter than namespace lifetime
+ * in which shp lives.
+ * We taken task_lock it means that shp won't be freed.
*/
- list_del(&task->sysvshm.shm_clist);
- up_read(&shm_ids(ns).rwsem);
- return;
- }
+ ns = shp->ns;
- /*
- * Destroy all already created segments, that were not yet mapped,
- * and mark any mapped as orphan to cover the sysctl toggling.
- * Destroy is skipped if shm_may_destroy() returns false.
- */
- down_write(&shm_ids(ns).rwsem);
- list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
- shp->shm_creator = NULL;
+ /*
+ * 2) If kernel.shm_rmid_forced is not set then only keep track of
+ * which shmids are orphaned, so that a later set of the sysctl
+ * can clean them up.
+ */
+ if (!ns->shm_rmid_forced)
+ goto unlink_continue;
- if (shm_may_destroy(ns, shp)) {
- shm_lock_by_ptr(shp);
- shm_destroy(ns, shp);
+ /*
+ * 3) get a reference to the namespace.
+ * The refcount could be already 0. If it is 0, then
+ * the shm objects will be free by free_ipc_work().
+ */
+ ns = get_ipc_ns_not_zero(ns);
+ if (!ns) {
+unlink_continue:
+ list_del_init(&shp->shm_clist);
+ task_unlock(task);
+ continue;
}
- }
- /* Remove the list head from any segments still attached. */
- list_del(&task->sysvshm.shm_clist);
- up_write(&shm_ids(ns).rwsem);
+ /*
+ * 4) get a reference to shp.
+ * This cannot fail: shm_clist_rm() is called before
+ * ipc_rmid(), thus the refcount cannot be 0.
+ */
+ WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
+
+ /*
+ * 5) unlink the shm segment from the list of segments
+ * created by current.
+ * This must be done last. After unlinking,
+ * only the refcounts obtained above prevent IPC_RMID
+ * from destroying the segment or the namespace.
+ */
+ list_del_init(&shp->shm_clist);
+
+ task_unlock(task);
+
+ /*
+ * 6) we have all references
+ * Thus lock & if needed destroy shp.
+ */
+ down_write(&shm_ids(ns).rwsem);
+ shm_lock_by_ptr(shp);
+ /*
+ * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
+ * safe to call ipc_rcu_putref here
+ */
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+
+ if (ipc_valid_object(&shp->shm_perm)) {
+ if (shm_may_destroy(shp))
+ shm_destroy(ns, shp);
+ else
+ shm_unlock(shp);
+ } else {
+ /*
+ * Someone else deleted the shp from namespace
+ * idr/kht while we have waited.
+ * Just unlock and continue.
+ */
+ shm_unlock(shp);
+ }
+
+ up_write(&shm_ids(ns).rwsem);
+ put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
+ }
}
static vm_fault_t shm_fault(struct vm_fault *vmf)
@@ -434,13 +523,13 @@ static vm_fault_t shm_fault(struct vm_fault *vmf)
return sfd->vm_ops->fault(vmf);
}
-static int shm_split(struct vm_area_struct *vma, unsigned long addr)
+static int shm_may_split(struct vm_area_struct *vma, unsigned long addr)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
- if (sfd->vm_ops->split)
- return sfd->vm_ops->split(vma, addr);
+ if (sfd->vm_ops->may_split)
+ return sfd->vm_ops->may_split(vma, addr);
return 0;
}
@@ -582,7 +671,7 @@ static const struct vm_operations_struct shm_vm_ops = {
.open = shm_open, /* callback for a new vm-area open */
.close = shm_close, /* callback for when the vm-area is released */
.fault = shm_fault,
- .split = shm_split,
+ .may_split = shm_may_split,
.pagesize = shm_pagesize,
#if defined(CONFIG_NUMA)
.set_policy = shm_set_policy,
@@ -619,18 +708,18 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
ns->shm_tot + numpages > ns->shm_ctlall)
return -ENOSPC;
- shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
+ shp = kmalloc(sizeof(*shp), GFP_KERNEL_ACCOUNT);
if (unlikely(!shp))
return -ENOMEM;
shp->shm_perm.key = key;
shp->shm_perm.mode = (shmflg & S_IRWXUGO);
- shp->mlock_user = NULL;
+ shp->mlock_ucounts = NULL;
shp->shm_perm.security = NULL;
error = security_shm_alloc(&shp->shm_perm);
if (error) {
- kvfree(shp);
+ kfree(shp);
return error;
}
@@ -650,8 +739,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if (shmflg & SHM_NORESERVE)
acctflag = VM_NORESERVE;
file = hugetlb_file_setup(name, hugesize, acctflag,
- &shp->mlock_user, HUGETLB_SHMFS_INODE,
- (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
+ HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
} else {
/*
* Do not allow no accounting for OVERCOMMIT_NEVER, even
@@ -680,7 +768,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if (error < 0)
goto no_id;
+ shp->ns = ns;
+
+ task_lock(current);
list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
+ task_unlock(current);
/*
* shmid gets reported as "inode#" in /proc/pid/maps.
@@ -698,8 +790,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
no_id:
ipc_update_pid(&shp->shm_cprid, NULL);
ipc_update_pid(&shp->shm_lprid, NULL);
- if (is_file_hugepages(file) && shp->mlock_user)
- user_shm_unlock(size, shp->mlock_user);
fput(file);
ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
return error;
@@ -711,8 +801,7 @@ no_file:
/*
* Called with shm_ids.rwsem and ipcp locked.
*/
-static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
- struct ipc_params *params)
+static int shm_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params)
{
struct shmid_kernel *shp;
@@ -1106,12 +1195,12 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
goto out_unlock0;
if (cmd == SHM_LOCK) {
- struct user_struct *user = current_user();
+ struct ucounts *ucounts = current_ucounts();
- err = shmem_lock(shm_file, 1, user);
+ err = shmem_lock(shm_file, 1, ucounts);
if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
shp->shm_perm.mode |= SHM_LOCKED;
- shp->mlock_user = user;
+ shp->mlock_ucounts = ucounts;
}
goto out_unlock0;
}
@@ -1119,9 +1208,9 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
/* SHM_UNLOCK */
if (!(shp->shm_perm.mode & SHM_LOCKED))
goto out_unlock0;
- shmem_lock(shm_file, 0, shp->mlock_user);
+ shmem_lock(shm_file, 0, shp->mlock_ucounts);
shp->shm_perm.mode &= ~SHM_LOCKED;
- shp->mlock_user = NULL;
+ shp->mlock_ucounts = NULL;
get_file(shm_file);
ipc_unlock_object(&shp->shm_perm);
rcu_read_unlock();
@@ -1180,7 +1269,7 @@ static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int ver
case IPC_SET:
if (copy_shmid_from_user(&sem64, buf, version))
return -EFAULT;
- /* fallthru */
+ fallthrough;
case IPC_RMID:
return shmctl_down(ns, shmid, cmd, &sem64);
case SHM_LOCK:
@@ -1332,7 +1421,7 @@ static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
}
}
-long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
+static long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
{
struct ipc_namespace *ns;
struct shmid64_ds sem64;
@@ -1375,13 +1464,12 @@ long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
case IPC_SET:
if (copy_compat_shmid_from_user(&sem64, uptr, version))
return -EFAULT;
- /* fallthru */
+ fallthrough;
case IPC_RMID:
return shmctl_down(ns, shmid, cmd, &sem64);
case SHM_LOCK:
case SHM_UNLOCK:
return shmctl_do_lock(ns, shmid, cmd);
- break;
default:
return -EINVAL;
}
@@ -1544,7 +1632,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
if (err)
goto out_fput;
- if (down_write_killable(&current->mm->mmap_sem)) {
+ if (mmap_write_lock_killable(current->mm)) {
err = -EINTR;
goto out_fput;
}
@@ -1558,13 +1646,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
goto invalid;
}
- addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
+ addr = do_mmap(file, addr, size, prot, flags, 0, &populate, NULL);
*raddr = addr;
err = 0;
if (IS_ERR_VALUE(addr))
err = (long)addr;
invalid:
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
if (populate)
mm_populate(addr, populate);
@@ -1575,7 +1663,8 @@ out_nattch:
down_write(&shm_ids(ns).rwsem);
shp = shm_lock(ns, shmid);
shp->shm_nattch--;
- if (shm_may_destroy(ns, shp))
+
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
else
shm_unlock(shp);
@@ -1632,13 +1721,13 @@ long ksys_shmdt(char __user *shmaddr)
#ifdef CONFIG_MMU
loff_t size = 0;
struct file *file;
- struct vm_area_struct *next;
+ VMA_ITERATOR(vmi, mm, addr);
#endif
if (addr & ~PAGE_MASK)
return retval;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
/*
@@ -1662,12 +1751,9 @@ long ksys_shmdt(char __user *shmaddr)
* match the usual checks anyway. So assume all vma's are
* above the starting address given.
*/
- vma = find_vma(mm, addr);
#ifdef CONFIG_MMU
- while (vma) {
- next = vma->vm_next;
-
+ for_each_vma(vmi, vma) {
/*
* Check if the starting address would match, i.e. it's
* a fragment created by mprotect() and/or munmap(), or it
@@ -1685,6 +1771,7 @@ long ksys_shmdt(char __user *shmaddr)
file = vma->vm_file;
size = i_size_read(file_inode(vma->vm_file));
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
+ mas_pause(&vmi.mas);
/*
* We discovered the size of the shm segment, so
* break out of here and fall through to the next
@@ -1692,10 +1779,9 @@ long ksys_shmdt(char __user *shmaddr)
* searching for matching vma's.
*/
retval = 0;
- vma = next;
+ vma = vma_next(&vmi);
break;
}
- vma = next;
}
/*
@@ -1705,17 +1791,19 @@ long ksys_shmdt(char __user *shmaddr)
*/
size = PAGE_ALIGN(size);
while (vma && (loff_t)(vma->vm_end - addr) <= size) {
- next = vma->vm_next;
-
/* finding a matching vma now does not alter retval */
if ((vma->vm_ops == &shm_vm_ops) &&
((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
- (vma->vm_file == file))
+ (vma->vm_file == file)) {
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
- vma = next;
+ mas_pause(&vmi.mas);
+ }
+
+ vma = vma_next(&vmi);
}
#else /* CONFIG_MMU */
+ vma = vma_lookup(mm, addr);
/* under NOMMU conditions, the exact address to be destroyed must be
* given
*/
@@ -1726,7 +1814,7 @@ long ksys_shmdt(char __user *shmaddr)
#endif
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return retval;
}