diff mbox

[4/7] Allow sysfs memory directories to be split

Message ID 4C3B3895.3040209@austin.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Nathan Fontenot July 12, 2010, 3:45 p.m. UTC
This patch introduces the new 'split' file in each memory sysfs
directory and the associated routines needed to handle splitting
a directory.

Signed-off-by; Nathan Fontenot <nfont@austin.ibm.com>
---
 drivers/base/memory.c |   99 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 98 insertions(+), 1 deletion(-)

Comments

KAMEZAWA Hiroyuki July 13, 2010, 6:28 a.m. UTC | #1
On Mon, 12 Jul 2010 10:45:25 -0500
Nathan Fontenot <nfont@austin.ibm.com> wrote:

> This patch introduces the new 'split' file in each memory sysfs
> directory and the associated routines needed to handle splitting
> a directory.
> 
> Signed-off-by; Nathan Fontenot <nfont@austin.ibm.com>
> ---

pleae check diff option...


>  drivers/base/memory.c |   99 +++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 98 insertions(+), 1 deletion(-)
> 
> Index: linux-2.6/drivers/base/memory.c
> ===================================================================
> --- linux-2.6.orig/drivers/base/memory.c	2010-07-09 14:23:20.000000000 -0500
> +++ linux-2.6/drivers/base/memory.c	2010-07-09 14:38:09.000000000 -0500
> @@ -32,6 +32,9 @@
>  
>  static int sections_per_block;
>  
> +static int register_memory(struct memory_block *, struct mem_section *,
> +			   int, enum mem_add_context);
> +
>  static inline int base_memory_block_id(int section_nr)
>  {
>  	return (section_nr / sections_per_block) * sections_per_block;
> @@ -309,11 +312,100 @@
>  	return sprintf(buf, "%d\n", mem->phys_device);
>  }
>  
> +static void update_memory_block_phys_indexes(struct memory_block *mem)
> +{
> +	struct list_head *pos;
> +	struct memory_block_section *mbs;
> +	unsigned long min_index = 0xffffffff;
> +	unsigned long max_index = 0;
> +
> +	list_for_each(pos, &mem->sections) {
> +		mbs = list_entry(pos, struct memory_block_section, next);
> +
> +		if (mbs->phys_index < min_index)
> +			min_index = mbs->phys_index;
> +
> +		if (mbs->phys_index > max_index)
> +			max_index = mbs->phys_index;
> +	}
> +
> +	mem->start_phys_index = min_index;
> +	mem->end_phys_index = max_index;
> +}
> +
> +static ssize_t
> +store_mem_split_block(struct sys_device *dev, struct sysdev_attribute *attr,
> +		      const char *buf, size_t count)
> +{
> +	struct memory_block *mem, *new_mem_blk;
> +	struct memory_block_section *mbs;
> +	struct list_head *pos, *tmp;
> +	struct mem_section *section;
> +	int min_scn_nr = 0;
> +	int max_scn_nr = 0;
> +	int total_scns = 0;
> +	int new_blk_min, new_blk_total;
> +	int ret = -EINVAL;
> +
> +	mem = container_of(dev, struct memory_block, sysdev);
> +
> +	if (list_is_singular(&mem->sections))
> +		return -EINVAL;

What this means ?


> +
> +	mutex_lock(&mem->state_mutex);
> +
> +	list_for_each(pos, &mem->sections) {
> +		mbs = list_entry(pos, struct memory_block_section, next);
> +
> +		total_scns++;
> +
> +		if (min_scn_nr > mbs->phys_index)
> +			min_scn_nr = mbs->phys_index;
> +
> +		if (max_scn_nr < mbs->phys_index)
> +			max_scn_nr = mbs->phys_index;
> +	}
> +
> +	new_mem_blk = kzalloc(sizeof(*new_mem_blk), GFP_KERNEL);
> +	if (!new_mem_blk)
> +		return -ENOMEM;
> +
> +	mutex_init(&new_mem_blk->state_mutex);
> +	INIT_LIST_HEAD(&new_mem_blk->sections);
> +	new_mem_blk->state = mem->state;
> +
> +	mutex_lock(&new_mem_blk->state_mutex);
> +
> +	new_blk_total = total_scns / 2;
> +	new_blk_min = max_scn_nr - new_blk_total + 1;
> +
> +	section = __nr_to_section(new_blk_min);
> +	ret = register_memory(new_mem_blk, section, 0, HOTPLUG);
> +
'nid' is always 0 ?

And for what purpose this interface is ? Does this split memory block into 2 pieces
of the same size ?? sounds __very__ strange interface to me.

If this is necessary, I hope move the whole things to configfs rather than
something tricky.

Bye.
-Kame
Nathan Fontenot July 13, 2010, 3:51 p.m. UTC | #2
On 07/13/2010 01:28 AM, KAMEZAWA Hiroyuki wrote:
> On Mon, 12 Jul 2010 10:45:25 -0500
> Nathan Fontenot <nfont@austin.ibm.com> wrote:
> 
>> This patch introduces the new 'split' file in each memory sysfs
>> directory and the associated routines needed to handle splitting
>> a directory.
>>
>> Signed-off-by; Nathan Fontenot <nfont@austin.ibm.com>
>> ---
> 
> pleae check diff option...
> 
> 
>>  drivers/base/memory.c |   99 +++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 98 insertions(+), 1 deletion(-)
>>
>> Index: linux-2.6/drivers/base/memory.c
>> ===================================================================
>> --- linux-2.6.orig/drivers/base/memory.c	2010-07-09 14:23:20.000000000 -0500
>> +++ linux-2.6/drivers/base/memory.c	2010-07-09 14:38:09.000000000 -0500
>> @@ -32,6 +32,9 @@
>>  
>>  static int sections_per_block;
>>  
>> +static int register_memory(struct memory_block *, struct mem_section *,
>> +			   int, enum mem_add_context);
>> +
>>  static inline int base_memory_block_id(int section_nr)
>>  {
>>  	return (section_nr / sections_per_block) * sections_per_block;
>> @@ -309,11 +312,100 @@
>>  	return sprintf(buf, "%d\n", mem->phys_device);
>>  }
>>  
>> +static void update_memory_block_phys_indexes(struct memory_block *mem)
>> +{
>> +	struct list_head *pos;
>> +	struct memory_block_section *mbs;
>> +	unsigned long min_index = 0xffffffff;
>> +	unsigned long max_index = 0;
>> +
>> +	list_for_each(pos, &mem->sections) {
>> +		mbs = list_entry(pos, struct memory_block_section, next);
>> +
>> +		if (mbs->phys_index < min_index)
>> +			min_index = mbs->phys_index;
>> +
>> +		if (mbs->phys_index > max_index)
>> +			max_index = mbs->phys_index;
>> +	}
>> +
>> +	mem->start_phys_index = min_index;
>> +	mem->end_phys_index = max_index;
>> +}
>> +
>> +static ssize_t
>> +store_mem_split_block(struct sys_device *dev, struct sysdev_attribute *attr,
>> +		      const char *buf, size_t count)
>> +{
>> +	struct memory_block *mem, *new_mem_blk;
>> +	struct memory_block_section *mbs;
>> +	struct list_head *pos, *tmp;
>> +	struct mem_section *section;
>> +	int min_scn_nr = 0;
>> +	int max_scn_nr = 0;
>> +	int total_scns = 0;
>> +	int new_blk_min, new_blk_total;
>> +	int ret = -EINVAL;
>> +
>> +	mem = container_of(dev, struct memory_block, sysdev);
>> +
>> +	if (list_is_singular(&mem->sections))
>> +		return -EINVAL;
> 
> What this means ?

list_is_singular() will return true if there is only one item
on the list.  In this case we cannot split a memory_block with
only one memory_block_section.

> 
> 
>> +
>> +	mutex_lock(&mem->state_mutex);
>> +
>> +	list_for_each(pos, &mem->sections) {
>> +		mbs = list_entry(pos, struct memory_block_section, next);
>> +
>> +		total_scns++;
>> +
>> +		if (min_scn_nr > mbs->phys_index)
>> +			min_scn_nr = mbs->phys_index;
>> +
>> +		if (max_scn_nr < mbs->phys_index)
>> +			max_scn_nr = mbs->phys_index;
>> +	}
>> +
>> +	new_mem_blk = kzalloc(sizeof(*new_mem_blk), GFP_KERNEL);
>> +	if (!new_mem_blk)
>> +		return -ENOMEM;
>> +
>> +	mutex_init(&new_mem_blk->state_mutex);
>> +	INIT_LIST_HEAD(&new_mem_blk->sections);
>> +	new_mem_blk->state = mem->state;
>> +
>> +	mutex_lock(&new_mem_blk->state_mutex);
>> +
>> +	new_blk_total = total_scns / 2;
>> +	new_blk_min = max_scn_nr - new_blk_total + 1;
>> +
>> +	section = __nr_to_section(new_blk_min);
>> +	ret = register_memory(new_mem_blk, section, 0, HOTPLUG);
>> +
> 'nid' is always 0 ?

Ahh.. good catch.  it may not be.  I'll look into finding the correct nid.

> 
> And for what purpose this interface is ? Does this split memory block into 2 pieces
> of the same size ?? sounds __very__ strange interface to me.

Yes, this splits the memory_block into two blocks of the same size.  This was
suggested as something we may want to do.  From ppc perspective I am not sure we
would use this.

The split functionality is not required.  The main goal of the patch set is to
reduce the number of memory sysfs directories created.  From a ppc perspective
the split functionality is not really needed.

> 
> If this is necessary, I hope move the whole things to configfs rather than
> something tricky.
> 
> Bye.
> -Kame
>
KAMEZAWA Hiroyuki July 14, 2010, 12:35 a.m. UTC | #3
On Tue, 13 Jul 2010 10:51:58 -0500
Nathan Fontenot <nfont@austin.ibm.com> wrote:

> > 
> > And for what purpose this interface is ? Does this split memory block into 2 pieces
> > of the same size ?? sounds __very__ strange interface to me.
> 
> Yes, this splits the memory_block into two blocks of the same size.  This was
> suggested as something we may want to do.  From ppc perspective I am not sure we
> would use this.
> 
> The split functionality is not required.  The main goal of the patch set is to
> reduce the number of memory sysfs directories created.  From a ppc perspective
> the split functionality is not really needed.
> 

Okay, this is an offer from me.

  1. I think you can add an boot option as "don't create memory sysfs".
     please do.

  2. I'd like to write a configfs module for handling memory hotplug even when
     sysfs directroy is not created.
     Because configfs support rmdir/mkdir, the user (ppc's daemon?) has to do
     
     When offlining section X.
     # insmod configfs_memory.ko
     # mount -t configfs none /configfs
     # mkdir /configfs/memoryX
     # echo offline > /configfs/memoryX/state
     # rmdir /configfs/memoryX

  And making this operation as the default bahavior for all arch's memory hotplug may
  be better...

Dave, how do you think ? Because ppc guys uses "probe" interface already,
this can be handled... no ?

One problem is that I don't have enough knowledge about configfs..it seems complex.

Thanks,
-Kame
Nathan Fontenot July 14, 2010, 3:18 a.m. UTC | #4
On 07/13/2010 07:35 PM, KAMEZAWA Hiroyuki wrote:
> On Tue, 13 Jul 2010 10:51:58 -0500
> Nathan Fontenot <nfont@austin.ibm.com> wrote:
> 
>>>
>>> And for what purpose this interface is ? Does this split memory block into 2 pieces
>>> of the same size ?? sounds __very__ strange interface to me.
>>
>> Yes, this splits the memory_block into two blocks of the same size.  This was
>> suggested as something we may want to do.  From ppc perspective I am not sure we
>> would use this.
>>
>> The split functionality is not required.  The main goal of the patch set is to
>> reduce the number of memory sysfs directories created.  From a ppc perspective
>> the split functionality is not really needed.
>>
> 
> Okay, this is an offer from me.
> 
>   1. I think you can add an boot option as "don't create memory sysfs".
>      please do.

I posted a patch to do that a week or so ago, it didn't go over very well.

> 
>   2. I'd like to write a configfs module for handling memory hotplug even when
>      sysfs directroy is not created.
>      Because configfs support rmdir/mkdir, the user (ppc's daemon?) has to do
>      
>      When offlining section X.
>      # insmod configfs_memory.ko
>      # mount -t configfs none /configfs
>      # mkdir /configfs/memoryX
>      # echo offline > /configfs/memoryX/state
>      # rmdir /configfs/memoryX
> 
>   And making this operation as the default bahavior for all arch's memory hotplug may
>   be better...
> 
> Dave, how do you think ? Because ppc guys uses "probe" interface already,
> this can be handled... no ?

ppc would still require the existance of the 'probe' interface.

Are you objecting to the 'split' functionality?  If so I do not see any reason from ppc
perspective that it is needed.  This was something Dave suggested, unless I am missing
something.

Since ppc needs the 'probe' interface in sysfs, and for ppc having mutliple 
memory_block_sections reside under a single memory_block makes memory hotplug
simpler.  On ppc we do emory hotplug operations on an LMB size basis.  With my
patches this now lets us set each memory_block to span an LMB's worth of
memory.  Now we could do emory hotplug in a single operation instead of multiple
operations to offline/online all of the memory sections in an LMB.

Of course the easy solution would be to increase SECTION_SIZE_BITS, but we need
support hardware that can have LMB's from 16 MB to 256 MB in size so the
SECTION_SIZE_BITS value has to remain small. 

> 
> One problem is that I don't have enough knowledge about configfs..it seems complex.

Me neither, thoug I will take a look at it.

-Nathan
KAMEZAWA Hiroyuki July 14, 2010, 3:25 a.m. UTC | #5
On Tue, 13 Jul 2010 22:18:03 -0500
Nathan Fontenot <nfont@austin.ibm.com> wrote:

> On 07/13/2010 07:35 PM, KAMEZAWA Hiroyuki wrote:
> > On Tue, 13 Jul 2010 10:51:58 -0500
> > Nathan Fontenot <nfont@austin.ibm.com> wrote:
> > 
> >>>
> >>> And for what purpose this interface is ? Does this split memory block into 2 pieces
> >>> of the same size ?? sounds __very__ strange interface to me.
> >>
> >> Yes, this splits the memory_block into two blocks of the same size.  This was
> >> suggested as something we may want to do.  From ppc perspective I am not sure we
> >> would use this.
> >>
> >> The split functionality is not required.  The main goal of the patch set is to
> >> reduce the number of memory sysfs directories created.  From a ppc perspective
> >> the split functionality is not really needed.
> >>
> > 
> > Okay, this is an offer from me.
> > 
> >   1. I think you can add an boot option as "don't create memory sysfs".
> >      please do.
> 
> I posted a patch to do that a week or so ago, it didn't go over very well.
> 
> > 
> >   2. I'd like to write a configfs module for handling memory hotplug even when
> >      sysfs directroy is not created.
> >      Because configfs support rmdir/mkdir, the user (ppc's daemon?) has to do
> >      
> >      When offlining section X.
> >      # insmod configfs_memory.ko
> >      # mount -t configfs none /configfs
> >      # mkdir /configfs/memoryX
> >      # echo offline > /configfs/memoryX/state
> >      # rmdir /configfs/memoryX
> > 
> >   And making this operation as the default bahavior for all arch's memory hotplug may
> >   be better...
> > 
> > Dave, how do you think ? Because ppc guys uses "probe" interface already,
> > this can be handled... no ?
> 
> ppc would still require the existance of the 'probe' interface.
> 
> Are you objecting to the 'split' functionality? 
yes.

> If so I do not see any reason from ppc
> perspective that it is needed.  This was something Dave suggested, unless I am missing
> something.
> 
> Since ppc needs the 'probe' interface in sysfs, and for ppc having mutliple 
> memory_block_sections reside under a single memory_block makes memory hotplug
> simpler.  On ppc we do emory hotplug operations on an LMB size basis.  With my
> patches this now lets us set each memory_block to span an LMB's worth of
> memory.  Now we could do emory hotplug in a single operation instead of multiple
> operations to offline/online all of the memory sections in an LMB.
> 

Why per-section memory offlining is provided is for allowing good success-rate of
memory offlining. Because memory-hotplug has to "migrate or free" all used page
under a section, possibility of memory unplug depends on usage of memory.
If a section contains unmovable page(kernel page), we can't offline sectin.

For example, comparing
  1. offlining 128MB of memory at once
  2. offlining 8 chunks of 16MB memory
"2" can get very good possibility and system-busy time can be much reduced.

IIUC, ppc's 1st requirement is "resizing" not "hot-removing some memory device",
"2" is much welcomed. So, some fine-grained interface to section_size is
appreciated. So, "multiple operations" is much better than single operation.

As I posted show/hide patch, I'm writing it in configfs. I think it meets IBM's
requirements.
_But_, it's IBM's issue not Fujitsu's. So, final decistion will depend on you guys.

Anyway, I don't like a too fancy interface as "split".

Thanks,
-Kame
Dave Hansen July 14, 2010, 3:26 a.m. UTC | #6
On Wed, 2010-07-14 at 09:35 +0900, KAMEZAWA Hiroyuki wrote:
>   2. I'd like to write a configfs module for handling memory hotplug even when
>      sysfs directroy is not created.
>      Because configfs support rmdir/mkdir, the user (ppc's daemon?) has to do
>      
>      When offlining section X.
>      # insmod configfs_memory.ko
>      # mount -t configfs none /configfs
>      # mkdir /configfs/memoryX
>      # echo offline > /configfs/memoryX/state
>      # rmdir /configfs/memoryX
> 
>   And making this operation as the default bahavior for all arch's memory hotplug may
>   be better...
> 
> Dave, how do you think ? Because ppc guys uses "probe" interface already,
> this can be handled... no ?

I think creating a interface to duplicate the existing sysfs one is a
bad idea.  I also think removing the existing sysfs one isn't feasible
since there are users, and it's truly part of the ABI.  So, I'm not
really a fan on the configfs interface. :(

I really do think the sysfs interface is fixable.  We should at least
give it a good shot before largely duplicating its functionality.

-- Dave
Nathan Fontenot July 14, 2010, 5:16 p.m. UTC | #7
On 07/13/2010 10:26 PM, Dave Hansen wrote:
> On Wed, 2010-07-14 at 09:35 +0900, KAMEZAWA Hiroyuki wrote:
>>   2. I'd like to write a configfs module for handling memory hotplug even when
>>      sysfs directroy is not created.
>>      Because configfs support rmdir/mkdir, the user (ppc's daemon?) has to do
>>      
>>      When offlining section X.
>>      # insmod configfs_memory.ko
>>      # mount -t configfs none /configfs
>>      # mkdir /configfs/memoryX
>>      # echo offline > /configfs/memoryX/state
>>      # rmdir /configfs/memoryX
>>
>>   And making this operation as the default bahavior for all arch's memory hotplug may
>>   be better...
>>
>> Dave, how do you think ? Because ppc guys uses "probe" interface already,
>> this can be handled... no ?
> 
> I think creating a interface to duplicate the existing sysfs one is a
> bad idea.  I also think removing the existing sysfs one isn't feasible
> since there are users, and it's truly part of the ABI.  So, I'm not
> really a fan on the configfs interface. :(
> 
> I really do think the sysfs interface is fixable.  We should at least
> give it a good shot before largely duplicating its functionality.

I agree with Dave, I don't think another memory hotplug interface is needed.

I am working to update the patchset to remove the split functionality and
fix other items commented on.  this new patch will just split the memory_block
structure so that a memory_block can span multiple memory sections.

Kame, I understand that offlining 16 MB is easier than 256 MB.  From the ppc
perspective though, we are still offlining 256 MB.  We do memory add/remove
on LMB size chunks, so we have to add/remove all of the memory sections contained
in an LMB.  If any one memory section covered by a LMB fails to add/remove, we
restore the memory sections to their orignal state an fail the add/remove operation.
NOTE: the code doing this is not in the kernel, but in the user-space drmgr
command (from powerpc-utils package).

-Nathan
diff mbox

Patch

Index: linux-2.6/drivers/base/memory.c
===================================================================
--- linux-2.6.orig/drivers/base/memory.c	2010-07-09 14:23:20.000000000 -0500
+++ linux-2.6/drivers/base/memory.c	2010-07-09 14:38:09.000000000 -0500
@@ -32,6 +32,9 @@ 
 
 static int sections_per_block;
 
+static int register_memory(struct memory_block *, struct mem_section *,
+			   int, enum mem_add_context);
+
 static inline int base_memory_block_id(int section_nr)
 {
 	return (section_nr / sections_per_block) * sections_per_block;
@@ -309,11 +312,100 @@ 
 	return sprintf(buf, "%d\n", mem->phys_device);
 }
 
+static void update_memory_block_phys_indexes(struct memory_block *mem)
+{
+	struct list_head *pos;
+	struct memory_block_section *mbs;
+	unsigned long min_index = 0xffffffff;
+	unsigned long max_index = 0;
+
+	list_for_each(pos, &mem->sections) {
+		mbs = list_entry(pos, struct memory_block_section, next);
+
+		if (mbs->phys_index < min_index)
+			min_index = mbs->phys_index;
+
+		if (mbs->phys_index > max_index)
+			max_index = mbs->phys_index;
+	}
+
+	mem->start_phys_index = min_index;
+	mem->end_phys_index = max_index;
+}
+
+static ssize_t
+store_mem_split_block(struct sys_device *dev, struct sysdev_attribute *attr,
+		      const char *buf, size_t count)
+{
+	struct memory_block *mem, *new_mem_blk;
+	struct memory_block_section *mbs;
+	struct list_head *pos, *tmp;
+	struct mem_section *section;
+	int min_scn_nr = 0;
+	int max_scn_nr = 0;
+	int total_scns = 0;
+	int new_blk_min, new_blk_total;
+	int ret = -EINVAL;
+
+	mem = container_of(dev, struct memory_block, sysdev);
+
+	if (list_is_singular(&mem->sections))
+		return -EINVAL;
+
+	mutex_lock(&mem->state_mutex);
+
+	list_for_each(pos, &mem->sections) {
+		mbs = list_entry(pos, struct memory_block_section, next);
+
+		total_scns++;
+
+		if (min_scn_nr > mbs->phys_index)
+			min_scn_nr = mbs->phys_index;
+
+		if (max_scn_nr < mbs->phys_index)
+			max_scn_nr = mbs->phys_index;
+	}
+
+	new_mem_blk = kzalloc(sizeof(*new_mem_blk), GFP_KERNEL);
+	if (!new_mem_blk)
+		return -ENOMEM;
+
+	mutex_init(&new_mem_blk->state_mutex);
+	INIT_LIST_HEAD(&new_mem_blk->sections);
+	new_mem_blk->state = mem->state;
+
+	mutex_lock(&new_mem_blk->state_mutex);
+
+	new_blk_total = total_scns / 2;
+	new_blk_min = max_scn_nr - new_blk_total + 1;
+
+	section = __nr_to_section(new_blk_min);
+	ret = register_memory(new_mem_blk, section, 0, HOTPLUG);
+
+	list_for_each_safe(pos, tmp, &mem->sections) {
+		unsigned long scn_nr;
+
+		mbs = list_entry(pos, struct memory_block_section, next);
+		scn_nr = mbs->phys_index;
+
+		if ((scn_nr >= new_blk_min) && (scn_nr <= max_scn_nr))
+			list_move(&mbs->next, &new_mem_blk->sections);
+	}
+
+	update_memory_block_phys_indexes(mem);
+	update_memory_block_phys_indexes(new_mem_blk);
+
+	mutex_unlock(&new_mem_blk->state_mutex);
+	mutex_unlock(&mem->state_mutex);
+	return count;
+}
+
 static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
 static SYSDEV_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL);
 static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
 static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
 static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
+static SYSDEV_ATTR(split, 0200, NULL, store_mem_split_block);
 
 #define mem_create_simple_file(mem, attr_name)	\
 	sysdev_create_file(&mem->sysdev, &attr_##attr_name)
@@ -343,6 +435,8 @@ 
 		ret = mem_create_simple_file(memory, phys_device);
 	if (!ret)
 		ret = mem_create_simple_file(memory, removable);
+	if (!ret)
+		ret = mem_create_simple_file(memory, split);
 	if (!ret) {
 		if (context == HOTPLUG)
 			ret = register_mem_sect_under_node(memory, nid);
@@ -361,6 +455,7 @@ 
 	mem_remove_simple_file(memory, state);
 	mem_remove_simple_file(memory, phys_device);
 	mem_remove_simple_file(memory, removable);
+	mem_remove_simple_file(memory, split);
 
 	/* drop the ref. we got in remove_memory_block() */
 	kobject_put(&memory->sysdev.kobj);
@@ -568,8 +663,10 @@ 
 		kobject_put(&mem->sysdev.kobj);
 	}
 
-	if (!ret)
+	if (!ret) {
 		ret = add_mem_block_section(mem, __section_nr(section), state);
+		update_memory_block_phys_indexes(mem);
+	}
 
 	return ret;
 }