• Serge E. Hallyn's avatar
    cgroup_clone: use pid of newly created task for new cgroup · e885dcde
    Serge E. Hallyn authored
    
    cgroup_clone creates a new cgroup with the pid of the task.  This works
    correctly for unshare, but for clone cgroup_clone is called from
    copy_namespaces inside copy_process, which happens before the new pid is
    created.  As a result, the new cgroup was created with current's pid.
    This patch:
    
    	1. Moves the call inside copy_process to after the new pid
    	   is created
    	2. Passes the struct pid into ns_cgroup_clone (as it is not
    	   yet attached to the task)
    	3. Passes a name from ns_cgroup_clone() into cgroup_clone()
    	   so as to keep cgroup_clone() itself simpler
    	4. Uses pid_vnr() to get the process id value, so that the
    	   pid used to name the new cgroup is always the pid as it
    	   would be known to the task which did the cloning or
    	   unsharing.  I think that is the most intuitive thing to
    	   do.  This way, task t1 does clone(CLONE_NEWPID) to get
    	   t2, which does clone(CLONE_NEWPID) to get t3, then the
    	   cgroup for t3 will be named for the pid by which t2 knows
    	   t3.
    
    (Thanks to Dan Smith for finding the main bug)
    
    Changelog:
    	June 11: Incorporate Paul Menage's feedback:  don't pass
    	         NULL to ns_cgroup_clone from unshare, and reduce
    		 patch size by using 'nodename' in cgroup_clone.
    	June 10: Original version
    
    [akpm@linux-foundation.org: build fix]
    [akpm@linux-foundation.org: coding-style fixes]
    Signed-off-by: default avatarSerge Hallyn <serge@us.ibm.com>
    Acked-by: default avatarPaul Menage <menage@google.com>
    Tested-by: default avatarDan Smith <danms@us.ibm.com>
    Cc: Balbir Singh <balbir@in.ibm.com>
    Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
    Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
    Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
    e885dcde
ns_cgroup.c 2.35 KB
/*
 * ns_cgroup.c - namespace cgroup subsystem
 *
 * Copyright 2006, 2007 IBM Corp
 */

#include <linux/module.h>
#include <linux/cgroup.h>
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>

struct ns_cgroup {
	struct cgroup_subsys_state css;
	spinlock_t lock;
};

struct cgroup_subsys ns_subsys;

static inline struct ns_cgroup *cgroup_to_ns(
		struct cgroup *cgroup)
{
	return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
			    struct ns_cgroup, css);
}

int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
{
	char name[PROC_NUMBUF];

	snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
	return cgroup_clone(task, &ns_subsys, name);
}

/*
 * Rules:
 *   1. you can only enter a cgroup which is a child of your current
 *     cgroup
 *   2. you can only place another process into a cgroup if
 *     a. you have CAP_SYS_ADMIN
 *     b. your cgroup is an ancestor of task's destination cgroup
 *       (hence either you are in the same cgroup as task, or in an
 *        ancestor cgroup thereof)
 */
static int ns_can_attach(struct cgroup_subsys *ss,
		struct cgroup *new_cgroup, struct task_struct *task)
{
	struct cgroup *orig;

	if (current != task) {
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (!cgroup_is_descendant(new_cgroup))
			return -EPERM;
	}

	if (atomic_read(&new_cgroup->count) != 0)
		return -EPERM;

	orig = task_cgroup(task, ns_subsys_id);
	if (orig && orig != new_cgroup->parent)
		return -EPERM;

	return 0;
}

/*
 * Rules: you can only create a cgroup if
 *     1. you are capable(CAP_SYS_ADMIN)
 *     2. the target cgroup is a descendant of your own cgroup
 */
static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
						struct cgroup *cgroup)
{
	struct ns_cgroup *ns_cgroup;

	if (!capable(CAP_SYS_ADMIN))
		return ERR_PTR(-EPERM);
	if (!cgroup_is_descendant(cgroup))
		return ERR_PTR(-EPERM);

	ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
	if (!ns_cgroup)
		return ERR_PTR(-ENOMEM);
	spin_lock_init(&ns_cgroup->lock);
	return &ns_cgroup->css;
}

static void ns_destroy(struct cgroup_subsys *ss,
			struct cgroup *cgroup)
{
	struct ns_cgroup *ns_cgroup;

	ns_cgroup = cgroup_to_ns(cgroup);
	kfree(ns_cgroup);
}

struct cgroup_subsys ns_subsys = {
	.name = "ns",
	.can_attach = ns_can_attach,
	.create = ns_create,
	.destroy  = ns_destroy,
	.subsys_id = ns_subsys_id,
};