diff mbox series

[v18,05/10] xbitmap: add more operations

Message ID 1511963726-34070-6-git-send-email-wei.w.wang@intel.com
State New
Headers show
Series Virtio-balloon Enhancement | expand

Commit Message

Wang, Wei W Nov. 29, 2017, 1:55 p.m. UTC
This patch adds support to find next 1 or 0 bit in a xbmitmap range and
clear a range of bits.

More possible optimizations to add in the future:
1) xb_set_bit_range: set a range of bits.
2) when searching a bit, if the bit is not found in the slot, move on to
the next slot directly.
3) add tags to help searching.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Suggested-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/xbitmap.h      |   8 +-
 lib/xbitmap.c                | 180 +++++++++++++++++++++++++++++++++++++++++++
 tools/include/linux/bitmap.h |  34 ++++++++
 tools/include/linux/kernel.h |   2 +
 4 files changed, 223 insertions(+), 1 deletion(-)

Comments

Tetsuo Handa Nov. 30, 2017, 10:34 a.m. UTC | #1
Wei Wang wrote:
>  /**
> + * xb_clear_bit - clear a range of bits in the xbitmap

Name mismatch.

> + * @start: the start of the bit range, inclusive
> + * @end: the end of the bit range, inclusive
> + *
> + * This function is used to clear a bit in the xbitmap. If all the bits of the
> + * bitmap are 0, the bitmap will be freed.
> + */
> +void xb_clear_bit_range(struct xb *xb, unsigned long start, unsigned long end)
> +{
> +	struct radix_tree_root *root = &xb->xbrt;
> +	struct radix_tree_node *node;
> +	void **slot;
> +	struct ida_bitmap *bitmap;
> +	unsigned int nbits;
> +
> +	for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
> +		unsigned long index = start / IDA_BITMAP_BITS;
> +		unsigned long bit = start % IDA_BITMAP_BITS;
> +
> +		bitmap = __radix_tree_lookup(root, index, &node, &slot);
> +		if (radix_tree_exception(bitmap)) {
> +			unsigned long ebit = bit + 2;
> +			unsigned long tmp = (unsigned long)bitmap;
> +
> +			nbits = min(end - start + 1, BITS_PER_LONG - ebit);

"nbits = min(end - start + 1," seems to expect that start == end is legal
for clearing only 1 bit. But this function is no-op if start == end.
Please clarify what "inclusive" intended.

> +
> +			if (ebit >= BITS_PER_LONG)
> +				continue;

(I don't understand how radix tree works, but generally this patchset looks fuzzy
to me about boundary cases. Thus, I want to confirm that this is not an overlook.)
Why is making "ebit >= BITS_PER_LONG" (e.g. start == 62) case a no-op correct?
Aren't there bits which should have been cleared in this case?

> +			bitmap_clear(&tmp, ebit, nbits);
> +			if (tmp == RADIX_TREE_EXCEPTIONAL_ENTRY)
> +				__radix_tree_delete(root, node, slot);
> +			else
> +				rcu_assign_pointer(*slot, (void *)tmp);
> +		} else if (bitmap) {
> +			nbits = min(end - start + 1, IDA_BITMAP_BITS - bit);
> +
> +			if (nbits != IDA_BITMAP_BITS)
> +				bitmap_clear(bitmap->bitmap, bit, nbits);
> +
> +			if (nbits == IDA_BITMAP_BITS ||
> +			    bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
> +				kfree(bitmap);
> +				__radix_tree_delete(root, node, slot);
> +			}
> +		}
> +	}
> +}



> +static inline __always_inline void bitmap_clear(unsigned long *map,
> +						unsigned int start,
> +						unsigned int nbits)
> +{
> +	if (__builtin_constant_p(nbits) && nbits == 1)
> +		__clear_bit(start, map);
> +	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
> +		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))

It looks strange to apply __builtin_constant_p test to variables after "& 7".

> +		memset((char *)map + start / 8, 0, nbits / 8);
> +	else
> +		__bitmap_clear(map, start, nbits);
> +}
> +
Tetsuo Handa Nov. 30, 2017, 1:35 p.m. UTC | #2
Tetsuo Handa wrote:
> > +
> > +			if (ebit >= BITS_PER_LONG)
> > +				continue;
> 
> (I don't understand how radix tree works, but generally this patchset looks fuzzy
> to me about boundary cases. Thus, I want to confirm that this is not an overlook.)
> Why is making "ebit >= BITS_PER_LONG" (e.g. start == 62) case a no-op correct?
> Aren't there bits which should have been cleared in this case?

According to xb_set_bit(), it seems to me that we are trying to avoid memory allocation
for "struct ida_bitmap" when all set bits within a 1024-bits bitmap reside in the first
61 bits.

But does such saving help? Is there characteristic bias that majority of set bits resides
in the first 61 bits, for "bit" is "unsigned long" which holds a page number (isn't it)?
If no such bias, wouldn't eliminating radix_tree_exception() case and always storing
"struct ida_bitmap" simplifies the code (and make the processing faster)?
Matthew Wilcox Nov. 30, 2017, 2:39 p.m. UTC | #3
On Thu, Nov 30, 2017 at 10:35:03PM +0900, Tetsuo Handa wrote:
> According to xb_set_bit(), it seems to me that we are trying to avoid memory allocation
> for "struct ida_bitmap" when all set bits within a 1024-bits bitmap reside in the first
> 61 bits.
> 
> But does such saving help? Is there characteristic bias that majority of set bits resides
> in the first 61 bits, for "bit" is "unsigned long" which holds a page number (isn't it)?
> If no such bias, wouldn't eliminating radix_tree_exception() case and always storing
> "struct ida_bitmap" simplifies the code (and make the processing faster)?

It happens all the time.  The vast majority of users of the IDA set
low bits.  Also, it's the first 62 bits -- going up to 63 bits with the
XArray rewrite.

I do plan to redo the xbitmap on top of the XArray; I'm just trying to
get the XArray merged first.  The IDA and xbitmap code will share much
more code when that happens.
Tetsuo Handa Dec. 1, 2017, 1:02 p.m. UTC | #4
Wei Wang wrote:
> On 11/30/2017 06:34 PM, Tetsuo Handa wrote:
> > Wei Wang wrote:
> >> + * @start: the start of the bit range, inclusive
> >> + * @end: the end of the bit range, inclusive
> >> + *
> >> + * This function is used to clear a bit in the xbitmap. If all the bits of the
> >> + * bitmap are 0, the bitmap will be freed.
> >> + */
> >> +void xb_clear_bit_range(struct xb *xb, unsigned long start, unsigned long end)
> >> +{
> >> +	struct radix_tree_root *root = &xb->xbrt;
> >> +	struct radix_tree_node *node;
> >> +	void **slot;
> >> +	struct ida_bitmap *bitmap;
> >> +	unsigned int nbits;
> >> +
> >> +	for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
> >> +		unsigned long index = start / IDA_BITMAP_BITS;
> >> +		unsigned long bit = start % IDA_BITMAP_BITS;
> >> +
> >> +		bitmap = __radix_tree_lookup(root, index, &node, &slot);
> >> +		if (radix_tree_exception(bitmap)) {
> >> +			unsigned long ebit = bit + 2;
> >> +			unsigned long tmp = (unsigned long)bitmap;
> >> +
> >> +			nbits = min(end - start + 1, BITS_PER_LONG - ebit);
> > "nbits = min(end - start + 1," seems to expect that start == end is legal
> > for clearing only 1 bit. But this function is no-op if start == end.
> > Please clarify what "inclusive" intended.
> 
> If xb_clear_bit_range(xb,10,10), then it is effectively the same as 
> xb_clear_bit(10). Why would it be illegal?
> 
> "@start inclusive" means that the @start will also be included to be 
> cleared.

If start == end is legal,

   for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {

makes this loop do nothing because 10 < 10 is false.



> 
> >
> >> +static inline __always_inline void bitmap_clear(unsigned long *map,
> >> +						unsigned int start,
> >> +						unsigned int nbits)
> >> +{
> >> +	if (__builtin_constant_p(nbits) && nbits == 1)
> >> +		__clear_bit(start, map);
> >> +	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
> >> +		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
> > It looks strange to apply __builtin_constant_p test to variables after "& 7".
> >
> 
> I think this is normal - if the variables are known at compile time, the 
> calculation will be done at compile time (termed constant folding).

I think that

+	else if (__builtin_constant_p(start) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits) && IS_ALIGNED(nbits, 8))

is more readable.
Matthew Wilcox Dec. 1, 2017, 2:13 p.m. UTC | #5
On Fri, Dec 01, 2017 at 10:02:01PM +0900, Tetsuo Handa wrote:
> If start == end is legal,
> 
>    for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
> 
> makes this loop do nothing because 10 < 10 is false.

... and this is why we add tests to the test-suite!
Wang, Wei W Dec. 1, 2017, 3:09 p.m. UTC | #6
On Friday, December 1, 2017 9:02 PM, Tetsuo Handa wrote:
> Wei Wang wrote:
> > On 11/30/2017 06:34 PM, Tetsuo Handa wrote:
> > > Wei Wang wrote:
> > >> + * @start: the start of the bit range, inclusive
> > >> + * @end: the end of the bit range, inclusive
> > >> + *
> > >> + * This function is used to clear a bit in the xbitmap. If all the
> > >> +bits of the
> > >> + * bitmap are 0, the bitmap will be freed.
> > >> + */
> > >> +void xb_clear_bit_range(struct xb *xb, unsigned long start,
> > >> +unsigned long end) {
> > >> +	struct radix_tree_root *root = &xb->xbrt;
> > >> +	struct radix_tree_node *node;
> > >> +	void **slot;
> > >> +	struct ida_bitmap *bitmap;
> > >> +	unsigned int nbits;
> > >> +
> > >> +	for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
> > >> +		unsigned long index = start / IDA_BITMAP_BITS;
> > >> +		unsigned long bit = start % IDA_BITMAP_BITS;
> > >> +
> > >> +		bitmap = __radix_tree_lookup(root, index, &node, &slot);
> > >> +		if (radix_tree_exception(bitmap)) {
> > >> +			unsigned long ebit = bit + 2;
> > >> +			unsigned long tmp = (unsigned long)bitmap;
> > >> +
> > >> +			nbits = min(end - start + 1, BITS_PER_LONG - ebit);
> > > "nbits = min(end - start + 1," seems to expect that start == end is
> > > legal for clearing only 1 bit. But this function is no-op if start == end.
> > > Please clarify what "inclusive" intended.
> >
> > If xb_clear_bit_range(xb,10,10), then it is effectively the same as
> > xb_clear_bit(10). Why would it be illegal?
> >
> > "@start inclusive" means that the @start will also be included to be
> > cleared.
> 
> If start == end is legal,
> 
>    for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
> 
> makes this loop do nothing because 10 < 10 is false.


How about "start <= end "?

Best,
Wei
Matthew Wilcox Dec. 1, 2017, 5:25 p.m. UTC | #7
On Fri, Dec 01, 2017 at 03:09:08PM +0000, Wang, Wei W wrote:
> On Friday, December 1, 2017 9:02 PM, Tetsuo Handa wrote:
> > If start == end is legal,
> > 
> >    for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
> > 
> > makes this loop do nothing because 10 < 10 is false.
> 
> How about "start <= end "?

Don't ask Tetsuo for his opinion, write some userspace code that uses it.
Tetsuo Handa Dec. 3, 2017, 1:44 a.m. UTC | #8
Matthew Wilcox wrote:
> On Thu, Nov 30, 2017 at 10:35:03PM +0900, Tetsuo Handa wrote:
> > According to xb_set_bit(), it seems to me that we are trying to avoid memory allocation
> > for "struct ida_bitmap" when all set bits within a 1024-bits bitmap reside in the first
> > 61 bits.
> > 
> > But does such saving help? Is there characteristic bias that majority of set bits resides
> > in the first 61 bits, for "bit" is "unsigned long" which holds a page number (isn't it)?
> > If no such bias, wouldn't eliminating radix_tree_exception() case and always storing
> > "struct ida_bitmap" simplifies the code (and make the processing faster)?
> 
> It happens all the time.  The vast majority of users of the IDA set
> low bits.  Also, it's the first 62 bits -- going up to 63 bits with the
> XArray rewrite.

Oops, 0...61 is 62 bits.

What I meant is below (untested) patch. If correct, it can save lines and make
the code easier to read.

 lib/radix-tree.c | 20 +------------
 lib/xbitmap.c    | 88 +++++---------------------------------------------------
 2 files changed, 8 insertions(+), 100 deletions(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index a039588..fb84b91 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2152,25 +2152,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
  */
 __must_check bool xb_preload(gfp_t gfp)
 {
-	if (!this_cpu_read(ida_bitmap)) {
-		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
-
-		if (!bitmap)
-			return false;
-		/*
-		 * The per-CPU variable is updated with preemption enabled.
-		 * If the calling task is unlucky to be scheduled to another
-		 * CPU which has no ida_bitmap allocation, it will be detected
-		 * when setting a bit (i.e. __xb_set_bit()).
-		 */
-		bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
-		kfree(bitmap);
-	}
-
-	if (__radix_tree_preload(gfp, XB_PRELOAD_SIZE) < 0)
-		return false;
-
-	return true;
+	return __radix_tree_preload(gfp, XB_PRELOAD_SIZE) == 0;
 }
 EXPORT_SYMBOL(xb_preload);
 
diff --git a/lib/xbitmap.c b/lib/xbitmap.c
index 816dd3e..426d168 100644
--- a/lib/xbitmap.c
+++ b/lib/xbitmap.c
@@ -18,7 +18,7 @@
  * This function is used to set a bit in the xbitmap. If the bitmap that @bit
  * resides in is not there, the per-cpu ida_bitmap will be taken.
  *
- * Returns: 0 on success. %-EAGAIN indicates that @bit was not set.
+ * Returns: 0 on success. Negative value on failure.
  */
 int xb_set_bit(struct xb *xb, unsigned long bit)
 {
@@ -28,46 +28,19 @@ int xb_set_bit(struct xb *xb, unsigned long bit)
 	struct radix_tree_node *node;
 	void **slot;
 	struct ida_bitmap *bitmap;
-	unsigned long ebit;
 
 	bit %= IDA_BITMAP_BITS;
-	ebit = bit + 2;
 
 	err = __radix_tree_create(root, index, 0, &node, &slot);
 	if (err)
 		return err;
 	bitmap = rcu_dereference_raw(*slot);
-	if (radix_tree_exception(bitmap)) {
-		unsigned long tmp = (unsigned long)bitmap;
-
-		if (ebit < BITS_PER_LONG) {
-			tmp |= 1UL << ebit;
-			rcu_assign_pointer(*slot, (void *)tmp);
-			return 0;
-		}
-		bitmap = this_cpu_xchg(ida_bitmap, NULL);
-		if (!bitmap) {
-			__radix_tree_delete(root, node, slot);
-			return -EAGAIN;
-		}
-		memset(bitmap, 0, sizeof(*bitmap));
-		bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
-		rcu_assign_pointer(*slot, bitmap);
-	}
-
 	if (!bitmap) {
-		if (ebit < BITS_PER_LONG) {
-			bitmap = (void *)((1UL << ebit) |
-					RADIX_TREE_EXCEPTIONAL_ENTRY);
-			__radix_tree_replace(root, node, slot, bitmap, NULL);
-			return 0;
-		}
-		bitmap = this_cpu_xchg(ida_bitmap, NULL);
+		bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT | __GFP_NOWARN);
 		if (!bitmap) {
 			__radix_tree_delete(root, node, slot);
-			return -EAGAIN;
+			return -ENOMEM;
 		}
-		memset(bitmap, 0, sizeof(*bitmap));
 		__radix_tree_replace(root, node, slot, bitmap, NULL);
 	}
 
@@ -82,7 +55,7 @@ int xb_set_bit(struct xb *xb, unsigned long bit)
  *  @bit: index of the bit to set
  *
  * A wrapper of the xb_preload() and xb_set_bit().
- * Returns: 0 on success; -EAGAIN or -ENOMEM on error.
+ * Returns: 0 on success; -ENOMEM on error.
  */
 int xb_preload_and_set_bit(struct xb *xb, unsigned long bit, gfp_t gfp)
 {
@@ -113,25 +86,10 @@ void xb_clear_bit(struct xb *xb, unsigned long bit)
 	struct radix_tree_node *node;
 	void **slot;
 	struct ida_bitmap *bitmap;
-	unsigned long ebit;
 
 	bit %= IDA_BITMAP_BITS;
-	ebit = bit + 2;
 
 	bitmap = __radix_tree_lookup(root, index, &node, &slot);
-	if (radix_tree_exception(bitmap)) {
-		unsigned long tmp = (unsigned long)bitmap;
-
-		if (ebit >= BITS_PER_LONG)
-			return;
-		tmp &= ~(1UL << ebit);
-		if (tmp == RADIX_TREE_EXCEPTIONAL_ENTRY)
-			__radix_tree_delete(root, node, slot);
-		else
-			rcu_assign_pointer(*slot, (void *)tmp);
-		return;
-	}
-
 	if (!bitmap)
 		return;
 
@@ -164,20 +122,7 @@ void xb_clear_bit_range(struct xb *xb, unsigned long start, unsigned long end)
 		unsigned long bit = start % IDA_BITMAP_BITS;
 
 		bitmap = __radix_tree_lookup(root, index, &node, &slot);
-		if (radix_tree_exception(bitmap)) {
-			unsigned long ebit = bit + 2;
-			unsigned long tmp = (unsigned long)bitmap;
-
-			nbits = min(end - start + 1, BITS_PER_LONG - ebit);
-
-			if (ebit >= BITS_PER_LONG)
-				continue;
-			bitmap_clear(&tmp, ebit, nbits);
-			if (tmp == RADIX_TREE_EXCEPTIONAL_ENTRY)
-				__radix_tree_delete(root, node, slot);
-			else
-				rcu_assign_pointer(*slot, (void *)tmp);
-		} else if (bitmap) {
+		if (bitmap) {
 			nbits = min(end - start + 1, IDA_BITMAP_BITS - bit);
 
 			if (nbits != IDA_BITMAP_BITS)
@@ -212,12 +157,6 @@ bool xb_test_bit(const struct xb *xb, unsigned long bit)
 
 	if (!bitmap)
 		return false;
-	if (radix_tree_exception(bitmap)) {
-		bit += RADIX_TREE_EXCEPTIONAL_SHIFT;
-		if (bit >= BITS_PER_LONG)
-			return false;
-		return (unsigned long)bitmap & (1UL << bit);
-	}
 	return test_bit(bit, bitmap->bitmap);
 }
 EXPORT_SYMBOL(xb_test_bit);
@@ -236,20 +175,7 @@ static unsigned long xb_find_next_bit(struct xb *xb, unsigned long start,
 		unsigned long bit = start % IDA_BITMAP_BITS;
 
 		bmap = __radix_tree_lookup(root, index, &node, &slot);
-		if (radix_tree_exception(bmap)) {
-			unsigned long tmp = (unsigned long)bmap;
-			unsigned long ebit = bit + 2;
-
-			if (ebit >= BITS_PER_LONG)
-				continue;
-			if (set)
-				ret = find_next_bit(&tmp, BITS_PER_LONG, ebit);
-			else
-				ret = find_next_zero_bit(&tmp, BITS_PER_LONG,
-							 ebit);
-			if (ret < BITS_PER_LONG)
-				return ret - 2 + IDA_BITMAP_BITS * index;
-		} else if (bmap) {
+		if (bmap) {
 			if (set)
 				ret = find_next_bit(bmap->bitmap,
 						    IDA_BITMAP_BITS, bit);
@@ -258,7 +184,7 @@ static unsigned long xb_find_next_bit(struct xb *xb, unsigned long start,
 							 IDA_BITMAP_BITS, bit);
 			if (ret < IDA_BITMAP_BITS)
 				return ret + index * IDA_BITMAP_BITS;
-		} else if (!bmap && !set) {
+		} else if (!set) {
 			return start;
 		}
 	}
--
Tetsuo Handa Dec. 3, 2017, 1:50 a.m. UTC | #9
Matthew Wilcox wrote:
> On Fri, Dec 01, 2017 at 03:09:08PM +0000, Wang, Wei W wrote:
> > On Friday, December 1, 2017 9:02 PM, Tetsuo Handa wrote:
> > > If start == end is legal,
> > > 
> > >    for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
> > > 
> > > makes this loop do nothing because 10 < 10 is false.
> > 
> > How about "start <= end "?
> 
> Don't ask Tetsuo for his opinion, write some userspace code that uses it.
> 

Please be sure to prepare for "end == -1UL" case, for "start < end" will become
true when "start = (start | (IDA_BITMAP_BITS - 1)) + 1" made "start == 0" due to
overflow.
Wang, Wei W Dec. 7, 2017, 12:01 p.m. UTC | #10
On 12/03/2017 09:50 AM, Tetsuo Handa wrote:
> Matthew Wilcox wrote:
>> On Fri, Dec 01, 2017 at 03:09:08PM +0000, Wang, Wei W wrote:
>>> On Friday, December 1, 2017 9:02 PM, Tetsuo Handa wrote:
>>>> If start == end is legal,
>>>>
>>>>     for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
>>>>
>>>> makes this loop do nothing because 10 < 10 is false.
>>> How about "start <= end "?
>> Don't ask Tetsuo for his opinion, write some userspace code that uses it.
>>
> Please be sure to prepare for "end == -1UL" case, for "start < end" will become
> true when "start = (start | (IDA_BITMAP_BITS - 1)) + 1" made "start == 0" due to
> overflow.

I think there is one more corner case with this API: searching for bit 
"1" from [0, ULONG_MAX] while no bit is set in the range, there appear 
to be no possible value that we can return (returning "end + 1" will be 
"ULONG_MAX + 1", which is 0)
I plan to make the "end" be exclusive of the searching, that is, [start, 
end), and return "end" if no such bit is found.

For cases like [16, 16), returning 16 doesn't mean bit 16 is 1 or 0, it 
simply means there is no bits to search in the given range, since 16 is 
exclusive.

Please let me know if you have a different thought.

Best,
Wei
Michael S. Tsirkin Dec. 7, 2017, 3:41 p.m. UTC | #11
On Thu, Dec 07, 2017 at 08:01:24PM +0800, Wei Wang wrote:
> On 12/03/2017 09:50 AM, Tetsuo Handa wrote:
> > Matthew Wilcox wrote:
> > > On Fri, Dec 01, 2017 at 03:09:08PM +0000, Wang, Wei W wrote:
> > > > On Friday, December 1, 2017 9:02 PM, Tetsuo Handa wrote:
> > > > > If start == end is legal,
> > > > > 
> > > > >     for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
> > > > > 
> > > > > makes this loop do nothing because 10 < 10 is false.
> > > > How about "start <= end "?
> > > Don't ask Tetsuo for his opinion, write some userspace code that uses it.
> > > 
> > Please be sure to prepare for "end == -1UL" case, for "start < end" will become
> > true when "start = (start | (IDA_BITMAP_BITS - 1)) + 1" made "start == 0" due to
> > overflow.
> 
> I think there is one more corner case with this API: searching for bit "1"
> from [0, ULONG_MAX] while no bit is set in the range, there appear to be no
> possible value that we can return (returning "end + 1" will be "ULONG_MAX +
> 1", which is 0)
> I plan to make the "end" be exclusive of the searching, that is, [start,
> end), and return "end" if no such bit is found.
> 
> For cases like [16, 16), returning 16 doesn't mean bit 16 is 1 or 0, it
> simply means there is no bits to search in the given range, since 16 is
> exclusive.
> 
> Please let me know if you have a different thought.
> 
> Best,
> Wei

Matthew is right though - you want to include tests for all
these corner cases.
diff mbox series

Patch

diff --git a/include/linux/xbitmap.h b/include/linux/xbitmap.h
index b4d8375..eddf0d5e 100644
--- a/include/linux/xbitmap.h
+++ b/include/linux/xbitmap.h
@@ -33,8 +33,14 @@  static inline void xb_init(struct xb *xb)
 }
 
 int xb_set_bit(struct xb *xb, unsigned long bit);
+int xb_preload_and_set_bit(struct xb *xb, unsigned long bit, gfp_t gfp);
 bool xb_test_bit(const struct xb *xb, unsigned long bit);
-int xb_clear_bit(struct xb *xb, unsigned long bit);
+void xb_clear_bit(struct xb *xb, unsigned long bit);
+unsigned long xb_find_next_set_bit(struct xb *xb, unsigned long start,
+				   unsigned long end);
+unsigned long xb_find_next_zero_bit(struct xb *xb, unsigned long start,
+				    unsigned long end);
+void xb_clear_bit_range(struct xb *xb, unsigned long start, unsigned long end);
 
 static inline bool xb_empty(const struct xb *xb)
 {
diff --git a/lib/xbitmap.c b/lib/xbitmap.c
index 182aa29..816dd3e 100644
--- a/lib/xbitmap.c
+++ b/lib/xbitmap.c
@@ -3,6 +3,13 @@ 
 #include <linux/bitmap.h>
 #include <linux/slab.h>
 
+/*
+ * Developer notes: locks are required to gurantee there is no concurrent
+ * calls of xb_set_bit, xb_clear_bit, xb_clear_bit_range, xb_test_bit,
+ * xb_find_next_set_bit, or xb_find_next_clear_bit to operate on the same
+ * ida bitamp.
+ */
+
 /**
  *  xb_set_bit - set a bit in the xbitmap
  *  @xb: the xbitmap tree used to record the bit
@@ -70,6 +77,28 @@  int xb_set_bit(struct xb *xb, unsigned long bit)
 EXPORT_SYMBOL(xb_set_bit);
 
 /**
+ *  xb_preload_and_set_bit - preload the memory and set a bit in the xbitmap
+ *  @xb: the xbitmap tree used to record the bit
+ *  @bit: index of the bit to set
+ *
+ * A wrapper of the xb_preload() and xb_set_bit().
+ * Returns: 0 on success; -EAGAIN or -ENOMEM on error.
+ */
+int xb_preload_and_set_bit(struct xb *xb, unsigned long bit, gfp_t gfp)
+{
+	int ret = 0;
+
+	if (!xb_preload(gfp))
+		return -ENOMEM;
+
+	ret = xb_set_bit(xb, bit);
+	xb_preload_end();
+
+	return ret;
+}
+EXPORT_SYMBOL(xb_preload_and_set_bit);
+
+/**
  * xb_clear_bit - clear a bit in the xbitmap
  * @xb: the xbitmap tree used to record the bit
  * @bit: index of the bit to clear
@@ -115,6 +144,56 @@  void xb_clear_bit(struct xb *xb, unsigned long bit)
 EXPORT_SYMBOL(xb_clear_bit);
 
 /**
+ * xb_clear_bit - clear a range of bits in the xbitmap
+ * @start: the start of the bit range, inclusive
+ * @end: the end of the bit range, inclusive
+ *
+ * This function is used to clear a bit in the xbitmap. If all the bits of the
+ * bitmap are 0, the bitmap will be freed.
+ */
+void xb_clear_bit_range(struct xb *xb, unsigned long start, unsigned long end)
+{
+	struct radix_tree_root *root = &xb->xbrt;
+	struct radix_tree_node *node;
+	void **slot;
+	struct ida_bitmap *bitmap;
+	unsigned int nbits;
+
+	for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
+		unsigned long index = start / IDA_BITMAP_BITS;
+		unsigned long bit = start % IDA_BITMAP_BITS;
+
+		bitmap = __radix_tree_lookup(root, index, &node, &slot);
+		if (radix_tree_exception(bitmap)) {
+			unsigned long ebit = bit + 2;
+			unsigned long tmp = (unsigned long)bitmap;
+
+			nbits = min(end - start + 1, BITS_PER_LONG - ebit);
+
+			if (ebit >= BITS_PER_LONG)
+				continue;
+			bitmap_clear(&tmp, ebit, nbits);
+			if (tmp == RADIX_TREE_EXCEPTIONAL_ENTRY)
+				__radix_tree_delete(root, node, slot);
+			else
+				rcu_assign_pointer(*slot, (void *)tmp);
+		} else if (bitmap) {
+			nbits = min(end - start + 1, IDA_BITMAP_BITS - bit);
+
+			if (nbits != IDA_BITMAP_BITS)
+				bitmap_clear(bitmap->bitmap, bit, nbits);
+
+			if (nbits == IDA_BITMAP_BITS ||
+			    bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+				kfree(bitmap);
+				__radix_tree_delete(root, node, slot);
+			}
+		}
+	}
+}
+EXPORT_SYMBOL(xb_clear_bit_range);
+
+/**
  * xb_test_bit - test a bit in the xbitmap
  * @xb: the xbitmap tree used to record the bit
  * @bit: index of the bit to test
@@ -143,6 +222,80 @@  bool xb_test_bit(const struct xb *xb, unsigned long bit)
 }
 EXPORT_SYMBOL(xb_test_bit);
 
+static unsigned long xb_find_next_bit(struct xb *xb, unsigned long start,
+				      unsigned long end, bool set)
+{
+	struct radix_tree_root *root = &xb->xbrt;
+	struct radix_tree_node *node;
+	void **slot;
+	struct ida_bitmap *bmap;
+	unsigned long ret = end + 1;
+
+	for (; start < end; start = (start | (IDA_BITMAP_BITS - 1)) + 1) {
+		unsigned long index = start / IDA_BITMAP_BITS;
+		unsigned long bit = start % IDA_BITMAP_BITS;
+
+		bmap = __radix_tree_lookup(root, index, &node, &slot);
+		if (radix_tree_exception(bmap)) {
+			unsigned long tmp = (unsigned long)bmap;
+			unsigned long ebit = bit + 2;
+
+			if (ebit >= BITS_PER_LONG)
+				continue;
+			if (set)
+				ret = find_next_bit(&tmp, BITS_PER_LONG, ebit);
+			else
+				ret = find_next_zero_bit(&tmp, BITS_PER_LONG,
+							 ebit);
+			if (ret < BITS_PER_LONG)
+				return ret - 2 + IDA_BITMAP_BITS * index;
+		} else if (bmap) {
+			if (set)
+				ret = find_next_bit(bmap->bitmap,
+						    IDA_BITMAP_BITS, bit);
+			else
+				ret = find_next_zero_bit(bmap->bitmap,
+							 IDA_BITMAP_BITS, bit);
+			if (ret < IDA_BITMAP_BITS)
+				return ret + index * IDA_BITMAP_BITS;
+		} else if (!bmap && !set) {
+			return start;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * xb_find_next_set_bit - find the next set bit in a range
+ * @xb: the xbitmap to search
+ * @start: the start of the range, inclusive
+ * @end: the end of the range, inclusive
+ *
+ * Returns: the index of the found bit, or @end + 1 if no such bit is found.
+ */
+unsigned long xb_find_next_set_bit(struct xb *xb, unsigned long start,
+				   unsigned long end)
+{
+	return xb_find_next_bit(xb, start, end, 1);
+}
+EXPORT_SYMBOL(xb_find_next_set_bit);
+
+/**
+ * xb_find_next_zero_bit - find the next zero bit in a range
+ * @xb: the xbitmap to search
+ * @start: the start of the range, inclusive
+ * @end: the end of the range, inclusive
+ *
+ * Returns: the index of the found bit, or @end + 1 if no such bit is found.
+ */
+unsigned long xb_find_next_zero_bit(struct xb *xb, unsigned long start,
+				    unsigned long end)
+{
+	return xb_find_next_bit(xb, start, end, 0);
+}
+EXPORT_SYMBOL(xb_find_next_zero_bit);
+
 #ifndef __KERNEL__
 
 static DEFINE_XB(xb1);
@@ -160,6 +313,31 @@  void xbitmap_check_bit(unsigned long bit)
 	xb_preload_end();
 }
 
+static void xbitmap_check_bit_range(void)
+{
+	/* Set a range of bits */
+	assert(!xb_preload_and_set_bit(&xb1, 1060, GFP_KERNEL));
+	assert(!xb_preload_and_set_bit(&xb1, 1061, GFP_KERNEL));
+	assert(!xb_preload_and_set_bit(&xb1, 1064, GFP_KERNEL));
+	assert(!xb_preload_and_set_bit(&xb1, 1065, GFP_KERNEL));
+	assert(!xb_preload_and_set_bit(&xb1, 8180, GFP_KERNEL));
+	assert(!xb_preload_and_set_bit(&xb1, 8181, GFP_KERNEL));
+	assert(!xb_preload_and_set_bit(&xb1, 8190, GFP_KERNEL));
+	assert(!xb_preload_and_set_bit(&xb1, 8191, GFP_KERNEL));
+
+	/* Test a range of bits */
+	assert(xb_find_next_set_bit(&xb1, 0, 10000) == 1060);
+	assert(xb_find_next_zero_bit(&xb1, 1061, 10000) == 1062);
+	assert(xb_find_next_set_bit(&xb1, 1062, 10000) == 1064);
+	assert(xb_find_next_zero_bit(&xb1, 1065, 10000) == 1066);
+	assert(xb_find_next_set_bit(&xb1, 1066, 10000) == 8180);
+	assert(xb_find_next_zero_bit(&xb1, 8180, 10000) == 8182);
+	xb_clear_bit_range(&xb1, 0, 20000);
+	assert(xb_find_next_set_bit(&xb1, 0, 10000) == 10001);
+
+	assert(xb_find_next_zero_bit(&xb1, 20000, 30000) == 20000);
+}
+
 void xbitmap_checks(void)
 {
 	xb_init(&xb1);
@@ -171,6 +349,8 @@  void xbitmap_checks(void)
 	xbitmap_check_bit(1025);
 	xbitmap_check_bit((1UL << 63) | (1UL << 24));
 	xbitmap_check_bit((1UL << 63) | (1UL << 24) | 70);
+
+	xbitmap_check_bit_range();
 }
 
 int __weak main(void)
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index ca16027..8d0bc1b 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -37,6 +37,40 @@  static inline void bitmap_zero(unsigned long *dst, int nbits)
 	}
 }
 
+static inline void __bitmap_clear(unsigned long *map, unsigned int start,
+				  int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
+
+static inline __always_inline void bitmap_clear(unsigned long *map,
+						unsigned int start,
+						unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__clear_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0, nbits / 8);
+	else
+		__bitmap_clear(map, start, nbits);
+}
+
 static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
 {
 	unsigned int nlongs = BITS_TO_LONGS(nbits);
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad8844..3c992ae 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -13,6 +13,8 @@ 
 #define UINT_MAX	(~0U)
 #endif
 
+#define IS_ALIGNED(x, a)	(((x) & ((typeof(x))(a) - 1)) == 0)
+
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 
 #define PERF_ALIGN(x, a)	__PERF_ALIGN_MASK(x, (typeof(x))(a)-1)