Patchwork [05/12] PCI: Require CAP_COMPROMISE_KERNEL for PCI BAR access

login
register
mail settings
Submitter Kyle McMartin
Date March 27, 2013, 3:08 p.m.
Message ID <20130327150807.GC14004@redacted.bos.redhat.com>
Download mbox | patch
Permalink /patch/231725/
State Not Applicable
Headers show

Comments

Kyle McMartin - March 27, 2013, 3:08 p.m.
On Wed, Mar 27, 2013 at 11:03:26AM -0400, Josh Boyer wrote:
> On Mon, Mar 18, 2013 at 5:32 PM, Matthew Garrett
> <matthew.garrett@nebula.com> wrote:
> > Any hardware that can potentially generate DMA has to be locked down from
> > userspace in order to avoid it being possible for an attacker to cause
> > arbitrary kernel behaviour. Default to paranoid - in future we can
> > potentially relax this for sufficiently IOMMU-isolated devices.
> >
> > Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com>
> 
> As noted here:
> 
> https://bugzilla.redhat.com/show_bug.cgi?id=908888
> 
> this breaks pci passthru with QEMU.  The suggestion in the bug is to move
> the check from read/write to open, but sysfs makes that somewhat
> difficult.  The open code is part of the core sysfs functionality shared
> with the majority of sysfs files, so adding a check there would restrict
> things that clearly don't need to be restricted.
> 
> Kyle had the idea to add a cap field to the attribute structure, and do
> a capable check if that is set.  That would allow for a more generic
> usage of capabilities in sysfs code, at the cost of slightly increasing
> the structure size and open path.  That seems somewhat promising if we
> stick with capabilities.
> 
> I would love to just squarely blame capabilities for causing this, but we
> can't just replace it with an efi_enabled(EFI_SECURE_BOOT) check because
> of the sysfs open case.  I'm not sure there are great answers here.
> 

Yeah, that was something like this (I don't even remember which Fedora
kernel version this was against.)

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josh Boyer - March 28, 2013, 12:46 p.m.
On Wed, Mar 27, 2013 at 11:08 AM, Kyle McMartin <kmcmarti@redhat.com> wrote:
> On Wed, Mar 27, 2013 at 11:03:26AM -0400, Josh Boyer wrote:
>> On Mon, Mar 18, 2013 at 5:32 PM, Matthew Garrett
>> <matthew.garrett@nebula.com> wrote:
>> > Any hardware that can potentially generate DMA has to be locked down from
>> > userspace in order to avoid it being possible for an attacker to cause
>> > arbitrary kernel behaviour. Default to paranoid - in future we can
>> > potentially relax this for sufficiently IOMMU-isolated devices.
>> >
>> > Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com>
>>
>> As noted here:
>>
>> https://bugzilla.redhat.com/show_bug.cgi?id=908888
>>
>> this breaks pci passthru with QEMU.  The suggestion in the bug is to move
>> the check from read/write to open, but sysfs makes that somewhat
>> difficult.  The open code is part of the core sysfs functionality shared
>> with the majority of sysfs files, so adding a check there would restrict
>> things that clearly don't need to be restricted.
>>
>> Kyle had the idea to add a cap field to the attribute structure, and do
>> a capable check if that is set.  That would allow for a more generic
>> usage of capabilities in sysfs code, at the cost of slightly increasing
>> the structure size and open path.  That seems somewhat promising if we
>> stick with capabilities.
>>
>> I would love to just squarely blame capabilities for causing this, but we
>> can't just replace it with an efi_enabled(EFI_SECURE_BOOT) check because
>> of the sysfs open case.  I'm not sure there are great answers here.
>>
>
> Yeah, that was something like this (I don't even remember which Fedora
> kernel version this was against.)

Mostly an FYI for the peanut gallery, but we noticed moving the cap check
to open breaks lspci being run by an unprivileged user.  It also doesn't
fix pci passthrough because QEMU opens the PCI resource files by itself
after it's already dropped all caps.

More thinking required.

josh
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -546,9 +546,6 @@  pci_write_config(struct file* filp, struct kobject *kobj,
 	loff_t init_off = off;
 	u8 *data = (u8*) buf;
 
-	if (!capable(CAP_COMPROMISE_KERNEL))
-		return -EPERM;
-
 	if (off > dev->cfg_size)
 		return 0;
 	if (off + count > dev->cfg_size) {
@@ -772,6 +769,7 @@  void pci_create_legacy_files(struct pci_bus *b)
 	b->legacy_io->attr.name = "legacy_io";
 	b->legacy_io->size = 0xffff;
 	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+	b->legacy_io->attr.cap = CAP_COMPROMISE_KERNEL;
 	b->legacy_io->read = pci_read_legacy_io;
 	b->legacy_io->write = pci_write_legacy_io;
 	b->legacy_io->mmap = pci_mmap_legacy_io;
@@ -786,6 +784,7 @@  void pci_create_legacy_files(struct pci_bus *b)
 	b->legacy_mem->attr.name = "legacy_mem";
 	b->legacy_mem->size = 1024*1024;
 	b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+	b->legacy_io->attr.cap = CAP_COMPROMISE_KERNEL;
 	b->legacy_mem->mmap = pci_mmap_legacy_mem;
 	pci_adjust_legacy_attr(b, pci_mmap_mem);
 	error = device_create_bin_file(&b->dev, b->legacy_mem);
@@ -855,9 +854,6 @@  pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 	resource_size_t start, end;
 	int i;
 
-	if (!capable(CAP_COMPROMISE_KERNEL))
-		return -EPERM;
-
 	for (i = 0; i < PCI_ROM_RESOURCE; i++)
 		if (res == &pdev->resource[i])
 			break;
@@ -965,9 +961,6 @@  pci_write_resource_io(struct file *filp, struct kobject *kobj,
 		      struct bin_attribute *attr, char *buf,
 		      loff_t off, size_t count)
 {
-	if (!capable(CAP_COMPROMISE_KERNEL))
-		return -EPERM;
-
 	return pci_resource_io(filp, kobj, attr, buf, off, count, true);
 }
 
@@ -1027,6 +1020,7 @@  static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
 		}
 		res_attr->attr.name = res_attr_name;
 		res_attr->attr.mode = S_IRUSR | S_IWUSR;
+		res_attr->attr.cap = CAP_COMPROMISE_KERNEL;
 		res_attr->size = pci_resource_len(pdev, num);
 		res_attr->private = &pdev->resource[num];
 		retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
@@ -1142,6 +1136,7 @@  static struct bin_attribute pci_config_attr = {
 	.attr =	{
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
+		.cap  = CAP_COMPROMISE_KERNEL,
 	},
 	.size = PCI_CFG_SPACE_SIZE,
 	.read = pci_read_config,
@@ -1152,6 +1147,7 @@  static struct bin_attribute pcie_config_attr = {
 	.attr =	{
 		.name = "config",
 		.mode = S_IRUGO | S_IWUSR,
+		.cap  = CAP_COMPROMISE_KERNEL,
 	},
 	.size = PCI_CFG_SPACE_EXP_SIZE,
 	.read = pci_read_config,
@@ -1201,6 +1197,7 @@  static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 		attr->size = dev->vpd->len;
 		attr->attr.name = "vpd";
 		attr->attr.mode = S_IRUSR | S_IWUSR;
+		attr->attr.cap = CAP_COMPROMISE_KERNEL;
 		attr->read = read_vpd_attr;
 		attr->write = write_vpd_attr;
 		retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 614b2b5..e40a725 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -402,6 +402,10 @@  static int open(struct inode * inode, struct file * file)
 	if (!sysfs_get_active(attr_sd))
 		return -ENODEV;
 
+	error = -EACCES;
+	if (attr->attr.cap && !capable(attr->attr.cap))
+		goto err_out;
+
 	error = -EACCES;
 	if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
 		goto err_out;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 381f06d..0cf0034 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -26,6 +26,7 @@  enum kobj_ns_type;
 struct attribute {
 	const char		*name;
 	umode_t			mode;
+	int			cap;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	bool			ignore_lockdep:1;
 	struct lock_class_key	*key;