diff mbox

[1/6,v5] Kernel DLPAR Infrastructure

Message ID 4AE8AF4D.4030403@austin.ibm.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Nathan Fontenot Oct. 28, 2009, 8:53 p.m. UTC
This patch provides the kernel DLPAR infrastructure in a new filed named
dlpar.c.  The functionality provided is for acquiring and releasing a resource
from firmware and the parsing of information returned from the
ibm,configure-connector rtas call.  Additionally this exports the pSeries
reconfiguration notifier chain so that it can be invoked when device tree 
updates are made.

Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com> 
---

Comments

Benjamin Herrenschmidt Oct. 29, 2009, 3:08 a.m. UTC | #1
On Wed, 2009-10-28 at 15:53 -0500, Nathan Fontenot wrote:
> This patch provides the kernel DLPAR infrastructure in a new filed named
> dlpar.c.  The functionality provided is for acquiring and releasing a resource
> from firmware and the parsing of information returned from the
> ibm,configure-connector rtas call.  Additionally this exports the pSeries
> reconfiguration notifier chain so that it can be invoked when device tree 
> updates are made.
> 
> Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com> 
> ---

Hi Nathan !

Finally I get to review this stuff :-)

> +#define CFG_CONN_WORK_SIZE	4096
> +static char workarea[CFG_CONN_WORK_SIZE];
> +static DEFINE_SPINLOCK(workarea_lock);

So I'm not a huge fan of this workarea static. First a static is in
effect a global name (as far as System.map etc... are concerned) so it
would warrant a better name. Then, do we really want that 4K of BSS
taken even on platforms that don't do dlpar ? Any reason why you don't
just pop a free page with __get_free_page() inside of
configure_connector() ?

> +struct cc_workarea {
> +	u32	drc_index;
> +	u32	zero;
> +	u32	name_offset;
> +	u32	prop_length;
> +	u32	prop_offset;
> +};
> +
> +static struct property *parse_cc_property(char *workarea)
> +{
> +	struct property *prop;
> +	struct cc_workarea *ccwa;
> +	char *name;
> +	char *value;
> +
> +	prop = kzalloc(sizeof(*prop), GFP_KERNEL);
> +	if (!prop)
> +		return NULL;
> +
> +	ccwa = (struct cc_workarea *)workarea;
> +	name = workarea + ccwa->name_offset;
> +	prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
> +	if (!prop->name) {
> +		kfree(prop);
> +		return NULL;
> +	}
> +
> +	strcpy(prop->name, name);
> +
> +	prop->length = ccwa->prop_length;
> +	value = workarea + ccwa->prop_offset;
> +	prop->value = kzalloc(prop->length, GFP_KERNEL);
> +	if (!prop->value) {
> +		kfree(prop->name);
> +		kfree(prop);
> +		return NULL;
> +	}
> +
> +	memcpy(prop->value, value, prop->length);
> +	return prop;
> +}
> +
> +static void free_property(struct property *prop)
> +{
> +	kfree(prop->name);
> +	kfree(prop->value);
> +	kfree(prop);
> +}
> +
> +static struct device_node *parse_cc_node(char *work_area)
> +{

const char* maybe ?

> +	struct device_node *dn;
> +	struct cc_workarea *ccwa;
> +	char *name;
> +
> +	dn = kzalloc(sizeof(*dn), GFP_KERNEL);
> +	if (!dn)
> +		return NULL;
> +
> +	ccwa = (struct cc_workarea *)work_area;
> +	name = work_area + ccwa->name_offset;

I'm wondering whether work_area should be a struct cc_workarea * in the
first place with a char data[] at the end, but that would mean probably
tweaking the offsets... no big deal, up to you.

> +	dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
> +	if (!dn->full_name) {
> +		kfree(dn);
> +		return NULL;
> +	}
> +
> +	strcpy(dn->full_name, name);

kstrdup ?

 .../...

> +#define NEXT_SIBLING    1
> +#define NEXT_CHILD      2
> +#define NEXT_PROPERTY   3
> +#define PREV_PARENT     4
> +#define MORE_MEMORY     5
> +#define CALL_AGAIN	-2
> +#define ERR_CFG_USE     -9003
> +
> +struct device_node *configure_connector(u32 drc_index)
> +{

It's a global exported function, I'd rather you call it
dlpar_configure_connector()

> +	struct device_node *dn;
> +	struct device_node *first_dn = NULL;
> +	struct device_node *last_dn = NULL;
> +	struct property *property;
> +	struct property *last_property = NULL;
> +	struct cc_workarea *ccwa;
> +	int cc_token;
> +	int rc;
> +
> +	cc_token = rtas_token("ibm,configure-connector");
> +	if (cc_token == RTAS_UNKNOWN_SERVICE)
> +		return NULL;
> +
> +	spin_lock(&workarea_lock);
> +
> +	ccwa = (struct cc_workarea *)&workarea[0];
> +	ccwa->drc_index = drc_index;
> +	ccwa->zero = 0;

Popping a free page with gfp (or just kmalloc'ing 4K) would avoid the
need for the lock too.

> +	rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
> +	while (rc) {
> +		switch (rc) {
> +		case NEXT_SIBLING:
> +			dn = parse_cc_node(workarea);
> +			if (!dn)
> +				goto cc_error;
> +
> +			dn->parent = last_dn->parent;
> +			last_dn->sibling = dn;
> +			last_dn = dn;
> +			break;
> +
> +		case NEXT_CHILD:
> +			dn = parse_cc_node(workarea);
> +			if (!dn)
> +				goto cc_error;
> +
> +			if (!first_dn)
> +				first_dn = dn;
> +			else {
> +				dn->parent = last_dn;
> +				if (last_dn)
> +					last_dn->child = dn;
> +			}
> +
> +			last_dn = dn;
> +			break;
> +
> +		case NEXT_PROPERTY:
> +			property = parse_cc_property(workarea);
> +			if (!property)
> +				goto cc_error;
> +
> +			if (!last_dn->properties)
> +				last_dn->properties = property;
> +			else
> +				last_property->next = property;
> +
> +			last_property = property;
> +			break;
> +
> +		case PREV_PARENT:
> +			last_dn = last_dn->parent;
> +			break;
> +
> +		case CALL_AGAIN:
> +			break;
> +
> +		case MORE_MEMORY:
> +		case ERR_CFG_USE:
> +		default:
> +			printk(KERN_ERR "Unexpected Error (%d) "
> +			       "returned from configure-connector\n", rc);
> +			goto cc_error;
> +		}
> +
> +		rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
> +	}
> +
> +	spin_unlock(&workarea_lock);
> +	return first_dn;
> +
> +cc_error:
> +	spin_unlock(&workarea_lock);
> +
> +	if (first_dn)
> +		free_cc_nodes(first_dn);
> +
> +	return NULL;
> +}
> +
> +static struct device_node *derive_parent(const char *path)
> +{
> +	struct device_node *parent;
> +	char parent_path[128];
> +	int parent_path_len;
> +
> +	parent_path_len = strrchr(path, '/') - path + 1;
> +	strlcpy(parent_path, path, parent_path_len);
> +
> +	parent = of_find_node_by_path(parent_path);
> +
> +	return parent;
> +}

This ...

> +static int add_one_node(struct device_node *dn)
> +{
> +	struct proc_dir_entry *ent;
> +	int rc;
> +
> +	of_node_set_flag(dn, OF_DYNAMIC);
> +	kref_init(&dn->kref);
> +	dn->parent = derive_parent(dn->full_name);
> +
> +	rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
> +					  PSERIES_RECONFIG_ADD, dn);
> +	if (rc == NOTIFY_BAD) {
> +		printk(KERN_ERR "Failed to add device node %s\n",
> +		       dn->full_name);
> +		return -ENOMEM; /* For now, safe to assume kmalloc failure */
> +	}
> +
> +	of_attach_node(dn);
> +
> +#ifdef CONFIG_PROC_DEVICETREE
> +	ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
> +	if (ent)
> +		proc_device_tree_add_node(dn, ent);
> +#endif
> +
> +	of_node_put(dn->parent);
> +	return 0;
> +}

 ... and this ...

> +int add_device_tree_nodes(struct device_node *dn)
> +{
> +	struct device_node *child = dn->child;
> +	struct device_node *sibling = dn->sibling;
> +	int rc;
> +
> +	dn->child = NULL;
> +	dn->sibling = NULL;
> +	dn->parent = NULL;
> +
> +	rc = add_one_node(dn);
> +	if (rc)
> +		return rc;
> +
> +	if (child) {
> +		rc = add_device_tree_nodes(child);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	if (sibling)
> +		rc = add_device_tree_nodes(sibling);
> +
> +	return rc;
> +}

 ... and this ...

> +static int remove_one_node(struct device_node *dn)
> +{
> +	struct device_node *parent = dn->parent;
> +	struct property *prop = dn->properties;
> +
> +#ifdef CONFIG_PROC_DEVICETREE
> +	while (prop) {
> +		remove_proc_entry(prop->name, dn->pde);
> +		prop = prop->next;
> +	}
> +
> +	if (dn->pde)
> +		remove_proc_entry(dn->pde->name, parent->pde);
> +#endif
> +
> +	blocking_notifier_call_chain(&pSeries_reconfig_chain,
> +			    PSERIES_RECONFIG_REMOVE, dn);
> +	of_detach_node(dn);
> +	of_node_put(dn); /* Must decrement the refcount */
> +
> +	return 0;
> +}

 ... and this ...

> +static int _remove_device_tree_nodes(struct device_node *dn)
> +{
> +	int rc;
> +
> +	if (dn->child) {
> +		rc = _remove_device_tree_nodes(dn->child);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	if (dn->sibling) {
> +		rc = _remove_device_tree_nodes(dn->sibling);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	rc = remove_one_node(dn);
> +	return rc;
> +}

 ... repeat myself ...

> +int remove_device_tree_nodes(struct device_node *dn)
> +{
> +	int rc;
> +
> +	if (dn->child) {
> +		rc = _remove_device_tree_nodes(dn->child);
> +		if (rc)
> +			return rc;
> +	}
> +
> +	rc = remove_one_node(dn);
> +	return rc;
> +}

 ... should probably all go to something like drivers/of/dynamic.c or at
least for now arch/powerpc/kernel/of_dynamic.c along with everything
related to dynamically adding and removing nodes. I see that potentially
useful for more than just DLPAR (though DLPAR is the only user right
now) and should also all be prefixed with of_*

> +#define DR_ENTITY_SENSE		9003
> +#define DR_ENTITY_PRESENT	1
> +#define DR_ENTITY_UNUSABLE	2
> +#define ALLOCATION_STATE	9003
> +#define ALLOC_UNUSABLE		0
> +#define ALLOC_USABLE		1
> +#define ISOLATION_STATE		9001
> +#define ISOLATE			0
> +#define UNISOLATE		1
> +
> +int acquire_drc(u32 drc_index)
> +{
> +	int dr_status, rc;
> +
> +	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
> +		       DR_ENTITY_SENSE, drc_index);
> +	if (rc || dr_status != DR_ENTITY_UNUSABLE)
> +		return -1;
> +
> +	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
> +	if (rc)
> +		return rc;
> +
> +	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
> +	if (rc) {
> +		rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
> +		return rc;
> +	}
> +
> +	return 0;
> +}
> +
> +int release_drc(u32 drc_index)
> +{
> +	int dr_status, rc;
> +
> +	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
> +		       DR_ENTITY_SENSE, drc_index);
> +	if (rc || dr_status != DR_ENTITY_PRESENT)
> +		return -1;
> +
> +	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
> +	if (rc)
> +		return rc;
> +
> +	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
> +	if (rc) {
> +		rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
> +		return rc;
> +	}
> +
> +	return 0;
> +}

Both above should have a dlpar_* prefix

> +static int pseries_dlpar_init(void)
> +{
> +	if (!machine_is(pseries))
> +		return 0;
> +
> +	return 0;
> +}
> +device_initcall(pseries_dlpar_init);

What the point ? :-)

Cheers
Ben.
Nathan Lynch Oct. 29, 2009, 3:59 a.m. UTC | #2
On Thu, 2009-10-29 at 14:08 +1100, Benjamin Herrenschmidt wrote:
> On Wed, 2009-10-28 at 15:53 -0500, Nathan Fontenot wrote:
> > +	struct device_node *dn;
> > +	struct device_node *first_dn = NULL;
> > +	struct device_node *last_dn = NULL;
> > +	struct property *property;
> > +	struct property *last_property = NULL;
> > +	struct cc_workarea *ccwa;
> > +	int cc_token;
> > +	int rc;
> > +
> > +	cc_token = rtas_token("ibm,configure-connector");
> > +	if (cc_token == RTAS_UNKNOWN_SERVICE)
> > +		return NULL;
> > +
> > +	spin_lock(&workarea_lock);
> > +
> > +	ccwa = (struct cc_workarea *)&workarea[0];
> > +	ccwa->drc_index = drc_index;
> > +	ccwa->zero = 0;
> 
> Popping a free page with gfp (or just kmalloc'ing 4K) would avoid the
> need for the lock too.

Not kmalloc -- the alignment of the buffer isn't guaranteed when
slub/slab debug is on, and iirc  the work area needs to be 4K-aligned.
__get_free_page should be fine, I think.
Nathan Fontenot Nov. 2, 2009, 4:27 p.m. UTC | #3
Benjamin Herrenschmidt wrote:
> On Wed, 2009-10-28 at 15:53 -0500, Nathan Fontenot wrote:
>> This patch provides the kernel DLPAR infrastructure in a new filed named
>> dlpar.c.  The functionality provided is for acquiring and releasing a resource
>> from firmware and the parsing of information returned from the
>> ibm,configure-connector rtas call.  Additionally this exports the pSeries
>> reconfiguration notifier chain so that it can be invoked when device tree 
>> updates are made.
>>
>> Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com> 
>> ---
> 
> Hi Nathan !
> 
> Finally I get to review this stuff :-)
> 

Thanks!

>> +#define CFG_CONN_WORK_SIZE	4096
>> +static char workarea[CFG_CONN_WORK_SIZE];
>> +static DEFINE_SPINLOCK(workarea_lock);
> 
> So I'm not a huge fan of this workarea static. First a static is in
> effect a global name (as far as System.map etc... are concerned) so it
> would warrant a better name. Then, do we really want that 4K of BSS
> taken even on platforms that don't do dlpar ? Any reason why you don't
> just pop a free page with __get_free_page() inside of
> configure_connector() ?
> 

I'm not either, having a static buffer and a lock feels like overkill
for this.  I tried kmalloc, but that didn't work.  I'll try using
__get_free_page.

>> +struct cc_workarea {
>> +	u32	drc_index;
>> +	u32	zero;
>> +	u32	name_offset;
>> +	u32	prop_length;
>> +	u32	prop_offset;
>> +};
>> +
>> +static struct property *parse_cc_property(char *workarea)
>> +{
>> +	struct property *prop;
>> +	struct cc_workarea *ccwa;
>> +	char *name;
>> +	char *value;
>> +
>> +	prop = kzalloc(sizeof(*prop), GFP_KERNEL);
>> +	if (!prop)
>> +		return NULL;
>> +
>> +	ccwa = (struct cc_workarea *)workarea;
>> +	name = workarea + ccwa->name_offset;
>> +	prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
>> +	if (!prop->name) {
>> +		kfree(prop);
>> +		return NULL;
>> +	}
>> +
>> +	strcpy(prop->name, name);
>> +
>> +	prop->length = ccwa->prop_length;
>> +	value = workarea + ccwa->prop_offset;
>> +	prop->value = kzalloc(prop->length, GFP_KERNEL);
>> +	if (!prop->value) {
>> +		kfree(prop->name);
>> +		kfree(prop);
>> +		return NULL;
>> +	}
>> +
>> +	memcpy(prop->value, value, prop->length);
>> +	return prop;
>> +}
>> +
>> +static void free_property(struct property *prop)
>> +{
>> +	kfree(prop->name);
>> +	kfree(prop->value);
>> +	kfree(prop);
>> +}
>> +
>> +static struct device_node *parse_cc_node(char *work_area)
>> +{
> 
> const char* maybe ?

sure.

> 
>> +	struct device_node *dn;
>> +	struct cc_workarea *ccwa;
>> +	char *name;
>> +
>> +	dn = kzalloc(sizeof(*dn), GFP_KERNEL);
>> +	if (!dn)
>> +		return NULL;
>> +
>> +	ccwa = (struct cc_workarea *)work_area;
>> +	name = work_area + ccwa->name_offset;
> 
> I'm wondering whether work_area should be a struct cc_workarea * in the
> first place with a char data[] at the end, but that would mean probably
> tweaking the offsets... no big deal, up to you.
>

I'll look onto that.  Anything that makes this easier to understand is good.

 
>> +	dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
>> +	if (!dn->full_name) {
>> +		kfree(dn);
>> +		return NULL;
>> +	}
>> +
>> +	strcpy(dn->full_name, name);
> 
> kstrdup ?

yep, should have used kstrdup.

> 
>  .../...
> 
>> +#define NEXT_SIBLING    1
>> +#define NEXT_CHILD      2
>> +#define NEXT_PROPERTY   3
>> +#define PREV_PARENT     4
>> +#define MORE_MEMORY     5
>> +#define CALL_AGAIN	-2
>> +#define ERR_CFG_USE     -9003
>> +
>> +struct device_node *configure_connector(u32 drc_index)
>> +{
> 
> It's a global exported function, I'd rather you call it
> dlpar_configure_connector()
>

ok.
 
>> +	struct device_node *dn;
>> +	struct device_node *first_dn = NULL;
>> +	struct device_node *last_dn = NULL;
>> +	struct property *property;
>> +	struct property *last_property = NULL;
>> +	struct cc_workarea *ccwa;
>> +	int cc_token;
>> +	int rc;
>> +
>> +	cc_token = rtas_token("ibm,configure-connector");
>> +	if (cc_token == RTAS_UNKNOWN_SERVICE)
>> +		return NULL;
>> +
>> +	spin_lock(&workarea_lock);
>> +
>> +	ccwa = (struct cc_workarea *)&workarea[0];
>> +	ccwa->drc_index = drc_index;
>> +	ccwa->zero = 0;
> 
> Popping a free page with gfp (or just kmalloc'ing 4K) would avoid the
> need for the lock too.

yes, see comment at beginning.

> 
>> +	rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
>> +	while (rc) {
>> +		switch (rc) {
>> +		case NEXT_SIBLING:
>> +			dn = parse_cc_node(workarea);
>> +			if (!dn)
>> +				goto cc_error;
>> +
>> +			dn->parent = last_dn->parent;
>> +			last_dn->sibling = dn;
>> +			last_dn = dn;
>> +			break;
>> +
>> +		case NEXT_CHILD:
>> +			dn = parse_cc_node(workarea);
>> +			if (!dn)
>> +				goto cc_error;
>> +
>> +			if (!first_dn)
>> +				first_dn = dn;
>> +			else {
>> +				dn->parent = last_dn;
>> +				if (last_dn)
>> +					last_dn->child = dn;
>> +			}
>> +
>> +			last_dn = dn;
>> +			break;
>> +
>> +		case NEXT_PROPERTY:
>> +			property = parse_cc_property(workarea);
>> +			if (!property)
>> +				goto cc_error;
>> +
>> +			if (!last_dn->properties)
>> +				last_dn->properties = property;
>> +			else
>> +				last_property->next = property;
>> +
>> +			last_property = property;
>> +			break;
>> +
>> +		case PREV_PARENT:
>> +			last_dn = last_dn->parent;
>> +			break;
>> +
>> +		case CALL_AGAIN:
>> +			break;
>> +
>> +		case MORE_MEMORY:
>> +		case ERR_CFG_USE:
>> +		default:
>> +			printk(KERN_ERR "Unexpected Error (%d) "
>> +			       "returned from configure-connector\n", rc);
>> +			goto cc_error;
>> +		}
>> +
>> +		rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
>> +	}
>> +
>> +	spin_unlock(&workarea_lock);
>> +	return first_dn;
>> +
>> +cc_error:
>> +	spin_unlock(&workarea_lock);
>> +
>> +	if (first_dn)
>> +		free_cc_nodes(first_dn);
>> +
>> +	return NULL;
>> +}
>> +
>> +static struct device_node *derive_parent(const char *path)
>> +{
>> +	struct device_node *parent;
>> +	char parent_path[128];
>> +	int parent_path_len;
>> +
>> +	parent_path_len = strrchr(path, '/') - path + 1;
>> +	strlcpy(parent_path, path, parent_path_len);
>> +
>> +	parent = of_find_node_by_path(parent_path);
>> +
>> +	return parent;
>> +}
> 
> This ...
> 
>> +static int add_one_node(struct device_node *dn)
>> +{
>> +	struct proc_dir_entry *ent;
>> +	int rc;
>> +
>> +	of_node_set_flag(dn, OF_DYNAMIC);
>> +	kref_init(&dn->kref);
>> +	dn->parent = derive_parent(dn->full_name);
>> +
>> +	rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
>> +					  PSERIES_RECONFIG_ADD, dn);
>> +	if (rc == NOTIFY_BAD) {
>> +		printk(KERN_ERR "Failed to add device node %s\n",
>> +		       dn->full_name);
>> +		return -ENOMEM; /* For now, safe to assume kmalloc failure */
>> +	}
>> +
>> +	of_attach_node(dn);
>> +
>> +#ifdef CONFIG_PROC_DEVICETREE
>> +	ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
>> +	if (ent)
>> +		proc_device_tree_add_node(dn, ent);
>> +#endif
>> +
>> +	of_node_put(dn->parent);
>> +	return 0;
>> +}
> 
>  ... and this ...
> 
>> +int add_device_tree_nodes(struct device_node *dn)
>> +{
>> +	struct device_node *child = dn->child;
>> +	struct device_node *sibling = dn->sibling;
>> +	int rc;
>> +
>> +	dn->child = NULL;
>> +	dn->sibling = NULL;
>> +	dn->parent = NULL;
>> +
>> +	rc = add_one_node(dn);
>> +	if (rc)
>> +		return rc;
>> +
>> +	if (child) {
>> +		rc = add_device_tree_nodes(child);
>> +		if (rc)
>> +			return rc;
>> +	}
>> +
>> +	if (sibling)
>> +		rc = add_device_tree_nodes(sibling);
>> +
>> +	return rc;
>> +}
> 
>  ... and this ...
> 
>> +static int remove_one_node(struct device_node *dn)
>> +{
>> +	struct device_node *parent = dn->parent;
>> +	struct property *prop = dn->properties;
>> +
>> +#ifdef CONFIG_PROC_DEVICETREE
>> +	while (prop) {
>> +		remove_proc_entry(prop->name, dn->pde);
>> +		prop = prop->next;
>> +	}
>> +
>> +	if (dn->pde)
>> +		remove_proc_entry(dn->pde->name, parent->pde);
>> +#endif
>> +
>> +	blocking_notifier_call_chain(&pSeries_reconfig_chain,
>> +			    PSERIES_RECONFIG_REMOVE, dn);
>> +	of_detach_node(dn);
>> +	of_node_put(dn); /* Must decrement the refcount */
>> +
>> +	return 0;
>> +}
> 
>  ... and this ...
> 
>> +static int _remove_device_tree_nodes(struct device_node *dn)
>> +{
>> +	int rc;
>> +
>> +	if (dn->child) {
>> +		rc = _remove_device_tree_nodes(dn->child);
>> +		if (rc)
>> +			return rc;
>> +	}
>> +
>> +	if (dn->sibling) {
>> +		rc = _remove_device_tree_nodes(dn->sibling);
>> +		if (rc)
>> +			return rc;
>> +	}
>> +
>> +	rc = remove_one_node(dn);
>> +	return rc;
>> +}
> 
>  ... repeat myself ...
> 
>> +int remove_device_tree_nodes(struct device_node *dn)
>> +{
>> +	int rc;
>> +
>> +	if (dn->child) {
>> +		rc = _remove_device_tree_nodes(dn->child);
>> +		if (rc)
>> +			return rc;
>> +	}
>> +
>> +	rc = remove_one_node(dn);
>> +	return rc;
>> +}
> 
>  ... should probably all go to something like drivers/of/dynamic.c or at
> least for now arch/powerpc/kernel/of_dynamic.c along with everything
> related to dynamically adding and removing nodes. I see that potentially
> useful for more than just DLPAR (though DLPAR is the only user right
> now) and should also all be prefixed with of_*

I agree, there should be at least a powerpc generic implementation of these
routines.  The reason I put them here is that I am doing some oddities with
the next, child, and sibling pointers since they point to items not yet in
the device tree.

I saw that Grant Likely is doing updates to all of the of_* stuff right now,
would it be ok to have these routines here, renamed as dlpar_*, and look
to merge them in with Grant's updates when he finishes?
  
> 
>> +#define DR_ENTITY_SENSE		9003
>> +#define DR_ENTITY_PRESENT	1
>> +#define DR_ENTITY_UNUSABLE	2
>> +#define ALLOCATION_STATE	9003
>> +#define ALLOC_UNUSABLE		0
>> +#define ALLOC_USABLE		1
>> +#define ISOLATION_STATE		9001
>> +#define ISOLATE			0
>> +#define UNISOLATE		1
>> +
>> +int acquire_drc(u32 drc_index)
>> +{
>> +	int dr_status, rc;
>> +
>> +	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
>> +		       DR_ENTITY_SENSE, drc_index);
>> +	if (rc || dr_status != DR_ENTITY_UNUSABLE)
>> +		return -1;
>> +
>> +	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
>> +	if (rc) {
>> +		rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
>> +		return rc;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +int release_drc(u32 drc_index)
>> +{
>> +	int dr_status, rc;
>> +
>> +	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
>> +		       DR_ENTITY_SENSE, drc_index);
>> +	if (rc || dr_status != DR_ENTITY_PRESENT)
>> +		return -1;
>> +
>> +	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
>> +	if (rc)
>> +		return rc;
>> +
>> +	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
>> +	if (rc) {
>> +		rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
>> +		return rc;
>> +	}
>> +
>> +	return 0;
>> +}
> 
> Both above should have a dlpar_* prefix

will do.

> 
>> +static int pseries_dlpar_init(void)
>> +{
>> +	if (!machine_is(pseries))
>> +		return 0;
>> +
>> +	return 0;
>> +}
>> +device_initcall(pseries_dlpar_init);
> 
> What the point ? :-)

Yeah, its a bit odd looking but later patches actually add code to the init routine
to set up memory probe/release and cpu probe/release handlers.

I'll look to add ifdef's around the initcall for cases where no work is to be done.

-Nathan Fontenot

> 
> Cheers
> Ben.
>
Grant Likely Nov. 2, 2009, 4:40 p.m. UTC | #4
On Mon, Nov 2, 2009 at 9:27 AM, Nathan Fontenot <nfont@austin.ibm.com> wrote:
> I saw that Grant Likely is doing updates to all of the of_* stuff right now,
> would it be ok to have these routines here, renamed as dlpar_*, and look
> to merge them in with Grant's updates when he finishes?

No because then we're stuck with renaming the API at a later date.
Name it what it is, and put it where it belongs.  I'll deal with any
merge breakage as it occurs.

g.
Nathan Fontenot Nov. 2, 2009, 4:47 p.m. UTC | #5
Grant Likely wrote:
> On Mon, Nov 2, 2009 at 9:27 AM, Nathan Fontenot <nfont@austin.ibm.com> wrote:
>> I saw that Grant Likely is doing updates to all of the of_* stuff right now,
>> would it be ok to have these routines here, renamed as dlpar_*, and look
>> to merge them in with Grant's updates when he finishes?
> 
> No because then we're stuck with renaming the API at a later date.
> Name it what it is, and put it where it belongs.  I'll deal with any
> merge breakage as it occurs.
> 

ok.  Would this be better off in powerpc code, or should I go ahead and put it
in something like drivers/of/dynamic.c?

-Nathan Fontenot
Grant Likely Nov. 2, 2009, 4:56 p.m. UTC | #6
On Mon, Nov 2, 2009 at 9:47 AM, Nathan Fontenot <nfont@austin.ibm.com> wrote:
> Grant Likely wrote:
>>
>> On Mon, Nov 2, 2009 at 9:27 AM, Nathan Fontenot <nfont@austin.ibm.com>
>> wrote:
>>>
>>> I saw that Grant Likely is doing updates to all of the of_* stuff right
>>> now,
>>> would it be ok to have these routines here, renamed as dlpar_*, and look
>>> to merge them in with Grant's updates when he finishes?
>>
>> No because then we're stuck with renaming the API at a later date.
>> Name it what it is, and put it where it belongs.  I'll deal with any
>> merge breakage as it occurs.
>>
>
> ok.  Would this be better off in powerpc code, or should I go ahead and put
> it
> in something like drivers/of/dynamic.c?

drivers/of/dynamic.c sounds fine to me.  I can always move them if it
find a better place.  Send the patch to me and cc: the
devicetree-discuss@lists.ozlabs.org mailing list.

g.
diff mbox

Patch

Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c	2009-10-28 15:21:38.000000000 -0500
@@ -0,0 +1,414 @@ 
+/*
+ * dlpar.c - support for dynamic reconfiguration (including PCI
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/pSeries_reconfig.h>
+
+#define CFG_CONN_WORK_SIZE	4096
+static char workarea[CFG_CONN_WORK_SIZE];
+static DEFINE_SPINLOCK(workarea_lock);
+
+struct cc_workarea {
+	u32	drc_index;
+	u32	zero;
+	u32	name_offset;
+	u32	prop_length;
+	u32	prop_offset;
+};
+
+static struct property *parse_cc_property(char *workarea)
+{
+	struct property *prop;
+	struct cc_workarea *ccwa;
+	char *name;
+	char *value;
+
+	prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+	if (!prop)
+		return NULL;
+
+	ccwa = (struct cc_workarea *)workarea;
+	name = workarea + ccwa->name_offset;
+	prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+	if (!prop->name) {
+		kfree(prop);
+		return NULL;
+	}
+
+	strcpy(prop->name, name);
+
+	prop->length = ccwa->prop_length;
+	value = workarea + ccwa->prop_offset;
+	prop->value = kzalloc(prop->length, GFP_KERNEL);
+	if (!prop->value) {
+		kfree(prop->name);
+		kfree(prop);
+		return NULL;
+	}
+
+	memcpy(prop->value, value, prop->length);
+	return prop;
+}
+
+static void free_property(struct property *prop)
+{
+	kfree(prop->name);
+	kfree(prop->value);
+	kfree(prop);
+}
+
+static struct device_node *parse_cc_node(char *work_area)
+{
+	struct device_node *dn;
+	struct cc_workarea *ccwa;
+	char *name;
+
+	dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+	if (!dn)
+		return NULL;
+
+	ccwa = (struct cc_workarea *)work_area;
+	name = work_area + ccwa->name_offset;
+	dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+	if (!dn->full_name) {
+		kfree(dn);
+		return NULL;
+	}
+
+	strcpy(dn->full_name, name);
+	return dn;
+}
+
+static void free_one_cc_node(struct device_node *dn)
+{
+	struct property *prop;
+
+	while (dn->properties) {
+		prop = dn->properties;
+		dn->properties = prop->next;
+		free_property(prop);
+	}
+
+	kfree(dn->full_name);
+	kfree(dn);
+}
+
+static void free_cc_nodes(struct device_node *dn)
+{
+	if (dn->child)
+		free_cc_nodes(dn->child);
+
+	if (dn->sibling)
+		free_cc_nodes(dn->sibling);
+
+	free_one_cc_node(dn);
+}
+
+#define NEXT_SIBLING    1
+#define NEXT_CHILD      2
+#define NEXT_PROPERTY   3
+#define PREV_PARENT     4
+#define MORE_MEMORY     5
+#define CALL_AGAIN	-2
+#define ERR_CFG_USE     -9003
+
+struct device_node *configure_connector(u32 drc_index)
+{
+	struct device_node *dn;
+	struct device_node *first_dn = NULL;
+	struct device_node *last_dn = NULL;
+	struct property *property;
+	struct property *last_property = NULL;
+	struct cc_workarea *ccwa;
+	int cc_token;
+	int rc;
+
+	cc_token = rtas_token("ibm,configure-connector");
+	if (cc_token == RTAS_UNKNOWN_SERVICE)
+		return NULL;
+
+	spin_lock(&workarea_lock);
+
+	ccwa = (struct cc_workarea *)&workarea[0];
+	ccwa->drc_index = drc_index;
+	ccwa->zero = 0;
+
+	rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+	while (rc) {
+		switch (rc) {
+		case NEXT_SIBLING:
+			dn = parse_cc_node(workarea);
+			if (!dn)
+				goto cc_error;
+
+			dn->parent = last_dn->parent;
+			last_dn->sibling = dn;
+			last_dn = dn;
+			break;
+
+		case NEXT_CHILD:
+			dn = parse_cc_node(workarea);
+			if (!dn)
+				goto cc_error;
+
+			if (!first_dn)
+				first_dn = dn;
+			else {
+				dn->parent = last_dn;
+				if (last_dn)
+					last_dn->child = dn;
+			}
+
+			last_dn = dn;
+			break;
+
+		case NEXT_PROPERTY:
+			property = parse_cc_property(workarea);
+			if (!property)
+				goto cc_error;
+
+			if (!last_dn->properties)
+				last_dn->properties = property;
+			else
+				last_property->next = property;
+
+			last_property = property;
+			break;
+
+		case PREV_PARENT:
+			last_dn = last_dn->parent;
+			break;
+
+		case CALL_AGAIN:
+			break;
+
+		case MORE_MEMORY:
+		case ERR_CFG_USE:
+		default:
+			printk(KERN_ERR "Unexpected Error (%d) "
+			       "returned from configure-connector\n", rc);
+			goto cc_error;
+		}
+
+		rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+	}
+
+	spin_unlock(&workarea_lock);
+	return first_dn;
+
+cc_error:
+	spin_unlock(&workarea_lock);
+
+	if (first_dn)
+		free_cc_nodes(first_dn);
+
+	return NULL;
+}
+
+static struct device_node *derive_parent(const char *path)
+{
+	struct device_node *parent;
+	char parent_path[128];
+	int parent_path_len;
+
+	parent_path_len = strrchr(path, '/') - path + 1;
+	strlcpy(parent_path, path, parent_path_len);
+
+	parent = of_find_node_by_path(parent_path);
+
+	return parent;
+}
+
+static int add_one_node(struct device_node *dn)
+{
+	struct proc_dir_entry *ent;
+	int rc;
+
+	of_node_set_flag(dn, OF_DYNAMIC);
+	kref_init(&dn->kref);
+	dn->parent = derive_parent(dn->full_name);
+
+	rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+					  PSERIES_RECONFIG_ADD, dn);
+	if (rc == NOTIFY_BAD) {
+		printk(KERN_ERR "Failed to add device node %s\n",
+		       dn->full_name);
+		return -ENOMEM; /* For now, safe to assume kmalloc failure */
+	}
+
+	of_attach_node(dn);
+
+#ifdef CONFIG_PROC_DEVICETREE
+	ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
+	if (ent)
+		proc_device_tree_add_node(dn, ent);
+#endif
+
+	of_node_put(dn->parent);
+	return 0;
+}
+
+int add_device_tree_nodes(struct device_node *dn)
+{
+	struct device_node *child = dn->child;
+	struct device_node *sibling = dn->sibling;
+	int rc;
+
+	dn->child = NULL;
+	dn->sibling = NULL;
+	dn->parent = NULL;
+
+	rc = add_one_node(dn);
+	if (rc)
+		return rc;
+
+	if (child) {
+		rc = add_device_tree_nodes(child);
+		if (rc)
+			return rc;
+	}
+
+	if (sibling)
+		rc = add_device_tree_nodes(sibling);
+
+	return rc;
+}
+
+static int remove_one_node(struct device_node *dn)
+{
+	struct device_node *parent = dn->parent;
+	struct property *prop = dn->properties;
+
+#ifdef CONFIG_PROC_DEVICETREE
+	while (prop) {
+		remove_proc_entry(prop->name, dn->pde);
+		prop = prop->next;
+	}
+
+	if (dn->pde)
+		remove_proc_entry(dn->pde->name, parent->pde);
+#endif
+
+	blocking_notifier_call_chain(&pSeries_reconfig_chain,
+			    PSERIES_RECONFIG_REMOVE, dn);
+	of_detach_node(dn);
+	of_node_put(dn); /* Must decrement the refcount */
+
+	return 0;
+}
+
+static int _remove_device_tree_nodes(struct device_node *dn)
+{
+	int rc;
+
+	if (dn->child) {
+		rc = _remove_device_tree_nodes(dn->child);
+		if (rc)
+			return rc;
+	}
+
+	if (dn->sibling) {
+		rc = _remove_device_tree_nodes(dn->sibling);
+		if (rc)
+			return rc;
+	}
+
+	rc = remove_one_node(dn);
+	return rc;
+}
+
+int remove_device_tree_nodes(struct device_node *dn)
+{
+	int rc;
+
+	if (dn->child) {
+		rc = _remove_device_tree_nodes(dn->child);
+		if (rc)
+			return rc;
+	}
+
+	rc = remove_one_node(dn);
+	return rc;
+}
+
+#define DR_ENTITY_SENSE		9003
+#define DR_ENTITY_PRESENT	1
+#define DR_ENTITY_UNUSABLE	2
+#define ALLOCATION_STATE	9003
+#define ALLOC_UNUSABLE		0
+#define ALLOC_USABLE		1
+#define ISOLATION_STATE		9001
+#define ISOLATE			0
+#define UNISOLATE		1
+
+int acquire_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+		       DR_ENTITY_SENSE, drc_index);
+	if (rc || dr_status != DR_ENTITY_UNUSABLE)
+		return -1;
+
+	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+	if (rc)
+		return rc;
+
+	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+	if (rc) {
+		rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+		return rc;
+	}
+
+	return 0;
+}
+
+int release_drc(u32 drc_index)
+{
+	int dr_status, rc;
+
+	rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+		       DR_ENTITY_SENSE, drc_index);
+	if (rc || dr_status != DR_ENTITY_PRESENT)
+		return -1;
+
+	rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+	if (rc)
+		return rc;
+
+	rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+	if (rc) {
+		rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int pseries_dlpar_init(void)
+{
+	if (!machine_is(pseries))
+		return 0;
+
+	return 0;
+}
+device_initcall(pseries_dlpar_init);
Index: powerpc/arch/powerpc/platforms/pseries/Makefile
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/Makefile	2009-10-28 15:20:38.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/Makefile	2009-10-28 15:21:38.000000000 -0500
@@ -8,7 +8,7 @@ 
 
 obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o ras.o rtasd.o \
-			   firmware.o power.o
+			   firmware.o power.o dlpar.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_XICS)	+= xics.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
Index: powerpc/arch/powerpc/include/asm/pSeries_reconfig.h
===================================================================
--- powerpc.orig/arch/powerpc/include/asm/pSeries_reconfig.h	2009-10-28 15:20:38.000000000 -0500
+++ powerpc/arch/powerpc/include/asm/pSeries_reconfig.h	2009-10-28 15:21:38.000000000 -0500
@@ -17,6 +17,7 @@ 
 #ifdef CONFIG_PPC_PSERIES
 extern int pSeries_reconfig_notifier_register(struct notifier_block *);
 extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
+extern struct blocking_notifier_head pSeries_reconfig_chain;
 #else /* !CONFIG_PPC_PSERIES */
 static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {
Index: powerpc/arch/powerpc/platforms/pseries/reconfig.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/reconfig.c	2009-10-28 15:20:38.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/reconfig.c	2009-10-28 15:21:38.000000000 -0500
@@ -96,7 +96,7 @@ 
 	return parent;
 }
 
-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
+BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
 
 int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {