diff mbox series

[02/18] block: open code kobj_map into in block/genhd.c

Message ID 20201029145841.144173-3-hch@lst.de
State New
Headers show
Series [01/18] block: cleanup del_gendisk a bit | expand

Commit Message

Christoph Hellwig Oct. 29, 2020, 2:58 p.m. UTC
Copy and paste the kobj_map functionality in the block code in preparation
for completely rewriting it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/genhd.c | 130 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 117 insertions(+), 13 deletions(-)

Comments

Greg Kroah-Hartman Oct. 29, 2020, 7:22 p.m. UTC | #1
On Thu, Oct 29, 2020 at 03:58:25PM +0100, Christoph Hellwig wrote:
> Copy and paste the kobj_map functionality in the block code in preparation
> for completely rewriting it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Yes!

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

After this, you want me to get rid of kobj_map, right?  Or you don't
care as block doesn't use it anymore?  :)

thanks,

greg k-h
Christoph Hellwig Oct. 29, 2020, 7:32 p.m. UTC | #2
On Thu, Oct 29, 2020 at 08:22:36PM +0100, Greg Kroah-Hartman wrote:
> After this, you want me to get rid of kobj_map, right?  Or you don't
> care as block doesn't use it anymore?  :)

I have a patch to kill it, but it causes odd regressions with the
tpm driver according to the kernel test.  As I have grand plans that
build on the block ѕide of this series for 5.11, I plan to defer the
chardev side and address it for 5.12.
Greg Kroah-Hartman Oct. 30, 2020, 10:40 a.m. UTC | #3
On Thu, Oct 29, 2020 at 08:32:42PM +0100, Christoph Hellwig wrote:
> On Thu, Oct 29, 2020 at 08:22:36PM +0100, Greg Kroah-Hartman wrote:
> > After this, you want me to get rid of kobj_map, right?  Or you don't
> > care as block doesn't use it anymore?  :)
> 
> I have a patch to kill it, but it causes odd regressions with the
> tpm driver according to the kernel test.  As I have grand plans that
> build on the block ѕide of this series for 5.11, I plan to defer the
> chardev side and address it for 5.12.

Ok, sounds good.

Wow, I just looked at the tpm code, and it is, um, "interesting" in how
it thinks device lifespans work.  Nothing like having 4 different
structures with different lifespans embedded within a single structure.
Good thing that no one can dynamically remove a TPM device during
"normal" operation.

greg k-h
Geert Uytterhoeven Oct. 30, 2020, 10:49 a.m. UTC | #4
Hi Greg,

On Fri, Oct 30, 2020 at 11:40 AM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
> On Thu, Oct 29, 2020 at 08:32:42PM +0100, Christoph Hellwig wrote:
> > On Thu, Oct 29, 2020 at 08:22:36PM +0100, Greg Kroah-Hartman wrote:
> > > After this, you want me to get rid of kobj_map, right?  Or you don't
> > > care as block doesn't use it anymore?  :)
> >
> > I have a patch to kill it, but it causes odd regressions with the
> > tpm driver according to the kernel test.  As I have grand plans that
> > build on the block ѕide of this series for 5.11, I plan to defer the
> > chardev side and address it for 5.12.
>
> Ok, sounds good.
>
> Wow, I just looked at the tpm code, and it is, um, "interesting" in how
> it thinks device lifespans work.  Nothing like having 4 different
> structures with different lifespans embedded within a single structure.
> Good thing that no one can dynamically remove a TPM device during
> "normal" operation.

/sys/.../unbind?

Gr{oetje,eeting}s,

                        Geert
Greg Kroah-Hartman Oct. 30, 2020, 11:06 a.m. UTC | #5
On Fri, Oct 30, 2020 at 11:49:11AM +0100, Geert Uytterhoeven wrote:
> Hi Greg,
> 
> On Fri, Oct 30, 2020 at 11:40 AM Greg Kroah-Hartman
> <gregkh@linuxfoundation.org> wrote:
> > On Thu, Oct 29, 2020 at 08:32:42PM +0100, Christoph Hellwig wrote:
> > > On Thu, Oct 29, 2020 at 08:22:36PM +0100, Greg Kroah-Hartman wrote:
> > > > After this, you want me to get rid of kobj_map, right?  Or you don't
> > > > care as block doesn't use it anymore?  :)
> > >
> > > I have a patch to kill it, but it causes odd regressions with the
> > > tpm driver according to the kernel test.  As I have grand plans that
> > > build on the block ѕide of this series for 5.11, I plan to defer the
> > > chardev side and address it for 5.12.
> >
> > Ok, sounds good.
> >
> > Wow, I just looked at the tpm code, and it is, um, "interesting" in how
> > it thinks device lifespans work.  Nothing like having 4 different
> > structures with different lifespans embedded within a single structure.
> > Good thing that no one can dynamically remove a TPM device during
> > "normal" operation.
> 
> /sys/.../unbind?

I said "normal" operations :)

Anyone who uses unbind and is suprised when things go "boom" is naive.

thanks,

greg k-h
Christoph Hellwig Oct. 31, 2020, 8:53 a.m. UTC | #6
On Fri, Oct 30, 2020 at 11:40:33AM +0100, Greg Kroah-Hartman wrote:
> On Thu, Oct 29, 2020 at 08:32:42PM +0100, Christoph Hellwig wrote:
> > On Thu, Oct 29, 2020 at 08:22:36PM +0100, Greg Kroah-Hartman wrote:
> > > After this, you want me to get rid of kobj_map, right?  Or you don't
> > > care as block doesn't use it anymore?  :)
> > 
> > I have a patch to kill it, but it causes odd regressions with the
> > tpm driver according to the kernel test.  As I have grand plans that
> > build on the block ѕide of this series for 5.11, I plan to defer the
> > chardev side and address it for 5.12.
> 
> Ok, sounds good.
> 
> Wow, I just looked at the tpm code, and it is, um, "interesting" in how
> it thinks device lifespans work.  Nothing like having 4 different
> structures with different lifespans embedded within a single structure.
> Good thing that no one can dynamically remove a TPM device during
> "normal" operation.

The regressions were during suspend then the tpm gets removed.  In
fact I'm pretty sure it is an existing problem that the change in the
lookup just surfaced in a way that the test bot notices, but I didn't
want to guard the block changes on it.
diff mbox series

Patch

diff --git a/block/genhd.c b/block/genhd.c
index 3ed591970ea640..0e22ca64aca09e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -17,7 +17,6 @@ 
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/kmod.h>
-#include <linux/kobj_map.h>
 #include <linux/mutex.h>
 #include <linux/idr.h>
 #include <linux/log2.h>
@@ -29,6 +28,16 @@ 
 static DEFINE_MUTEX(block_class_lock);
 static struct kobject *block_depr;
 
+struct bdev_map {
+	struct bdev_map *next;
+	dev_t dev;
+	unsigned long range;
+	struct module *owner;
+	struct kobject *(*probe)(dev_t, int *, void *);
+	int (*lock)(dev_t, void *);
+	void *data;
+} *bdev_map[255];
+
 /* for extended dynamic devt allocation, currently only one major is used */
 #define NR_EXT_DEVT		(1 << MINORBITS)
 
@@ -517,8 +526,6 @@  void unregister_blkdev(unsigned int major, const char *name)
 
 EXPORT_SYMBOL(unregister_blkdev);
 
-static struct kobj_map *bdev_map;
-
 /**
  * blk_mangle_minor - scatter minor numbers apart
  * @minor: minor number to mangle
@@ -645,16 +652,60 @@  void blk_register_region(dev_t devt, unsigned long range, struct module *module,
 			 struct kobject *(*probe)(dev_t, int *, void *),
 			 int (*lock)(dev_t, void *), void *data)
 {
-	kobj_map(bdev_map, devt, range, module, probe, lock, data);
-}
+	unsigned n = MAJOR(devt + range - 1) - MAJOR(devt) + 1;
+	unsigned index = MAJOR(devt);
+	unsigned i;
+	struct bdev_map *p;
+
+	n = min(n, 255u);
+	p = kmalloc_array(n, sizeof(struct bdev_map), GFP_KERNEL);
+	if (p == NULL)
+		return;
 
+	for (i = 0; i < n; i++, p++) {
+		p->owner = module;
+		p->probe = probe;
+		p->lock = lock;
+		p->dev = devt;
+		p->range = range;
+		p->data = data;
+	}
+
+	mutex_lock(&block_class_lock);
+	for (i = 0, p -= n; i < n; i++, p++, index++) {
+		struct bdev_map **s = &bdev_map[index % 255];
+		while (*s && (*s)->range < range)
+			s = &(*s)->next;
+		p->next = *s;
+		*s = p;
+	}
+	mutex_unlock(&block_class_lock);
+}
 EXPORT_SYMBOL(blk_register_region);
 
 void blk_unregister_region(dev_t devt, unsigned long range)
 {
-	kobj_unmap(bdev_map, devt, range);
-}
+	unsigned n = MAJOR(devt + range - 1) - MAJOR(devt) + 1;
+	unsigned index = MAJOR(devt);
+	unsigned i;
+	struct bdev_map *found = NULL;
 
+	mutex_lock(&block_class_lock);
+	for (i = 0; i < min(n, 255u); i++, index++) {
+		struct bdev_map **s;
+		for (s = &bdev_map[index % 255]; *s; s = &(*s)->next) {
+			struct bdev_map *p = *s;
+			if (p->dev == devt && p->range == range) {
+				*s = p->next;
+				if (!found)
+					found = p;
+				break;
+			}
+		}
+	}
+	mutex_unlock(&block_class_lock);
+	kfree(found);
+}
 EXPORT_SYMBOL(blk_unregister_region);
 
 static struct kobject *exact_match(dev_t devt, int *partno, void *data)
@@ -976,6 +1027,47 @@  static ssize_t disk_badblocks_store(struct device *dev,
 	return badblocks_store(disk->bb, page, len, 0);
 }
 
+static struct gendisk *lookup_gendisk(dev_t dev, int *partno)
+{
+	struct kobject *kobj;
+	struct bdev_map *p;
+	unsigned long best = ~0UL;
+
+retry:
+	mutex_lock(&block_class_lock);
+	for (p = bdev_map[MAJOR(dev) % 255]; p; p = p->next) {
+		struct kobject *(*probe)(dev_t, int *, void *);
+		struct module *owner;
+		void *data;
+
+		if (p->dev > dev || p->dev + p->range - 1 < dev)
+			continue;
+		if (p->range - 1 >= best)
+			break;
+		if (!try_module_get(p->owner))
+			continue;
+		owner = p->owner;
+		data = p->data;
+		probe = p->probe;
+		best = p->range - 1;
+		*partno = dev - p->dev;
+		if (p->lock && p->lock(dev, data) < 0) {
+			module_put(owner);
+			continue;
+		}
+		mutex_unlock(&block_class_lock);
+		kobj = probe(dev, partno, data);
+		/* Currently ->owner protects _only_ ->probe() itself. */
+		module_put(owner);
+		if (kobj)
+			return dev_to_disk(kobj_to_dev(kobj));
+		goto retry;
+	}
+	mutex_unlock(&block_class_lock);
+	return NULL;
+}
+
+
 /**
  * get_gendisk - get partitioning information for a given device
  * @devt: device to get partitioning information for
@@ -993,11 +1085,7 @@  struct gendisk *get_gendisk(dev_t devt, int *partno)
 	might_sleep();
 
 	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
-		struct kobject *kobj;
-
-		kobj = kobj_lookup(bdev_map, devt, partno);
-		if (kobj)
-			disk = dev_to_disk(kobj_to_dev(kobj));
+		disk = lookup_gendisk(devt, partno);
 	} else {
 		struct hd_struct *part;
 
@@ -1210,6 +1298,22 @@  static struct kobject *base_probe(dev_t devt, int *partno, void *data)
 	return NULL;
 }
 
+static void bdev_map_init(void)
+{
+	struct bdev_map *base;
+	int i;
+
+	base = kzalloc(sizeof(*base), GFP_KERNEL);
+	if (!base)
+		panic("cannot allocate bdev_map");
+
+	base->dev = 1;
+	base->range = ~0 ;
+	base->probe = base_probe;
+	for (i = 0; i < 255; i++)
+		bdev_map[i] = base;
+}
+
 static int __init genhd_device_init(void)
 {
 	int error;
@@ -1218,7 +1322,7 @@  static int __init genhd_device_init(void)
 	error = class_register(&block_class);
 	if (unlikely(error))
 		return error;
-	bdev_map = kobj_map_init(base_probe, &block_class_lock);
+	bdev_map_init();
 	blk_dev_init();
 
 	register_blkdev(BLOCK_EXT_MAJOR, "blkext");