diff mbox series

[PATCHv2] vgaarb: Add module param to allow for choosing the boot VGA device

Message ID 8498ea9f-2ba9-b5da-7dc4-1588363f1b62@absolutedigital.net
State New
Headers show
Series [PATCHv2] vgaarb: Add module param to allow for choosing the boot VGA device | expand

Commit Message

Cal Peake July 4, 2022, 9:12 p.m. UTC
Add module parameter 'bootdev' to the VGA arbiter to allow the user
to choose which PCI device should be selected over any others as the
boot VGA device.

When using a multi-GPU system with one or more GPUs being used in
conjunction with VFIO for passthrough to a virtual machine, if the
VGA arbiter settles on a passthrough GPU as the boot VGA device,
once the VFIO PCI driver claims that GPU, all display output is lost
and the result is blank screens and no VT access.

Signed-off-by: Cal Peake <cp@absolutedigital.net>
---
 .../admin-guide/kernel-parameters.txt         |  7 ++++
 drivers/pci/vgaarb.c                          | 40 +++++++++++++++++++
 2 files changed, 47 insertions(+)

Comments

Bjorn Helgaas July 4, 2022, 9:38 p.m. UTC | #1
[+cc Alex, Cornelia, kvm]

On Mon, Jul 04, 2022 at 05:12:44PM -0400, Cal Peake wrote:
> Add module parameter 'bootdev' to the VGA arbiter to allow the user
> to choose which PCI device should be selected over any others as the
> boot VGA device.
> 
> When using a multi-GPU system with one or more GPUs being used in
> conjunction with VFIO for passthrough to a virtual machine, if the
> VGA arbiter settles on a passthrough GPU as the boot VGA device,
> once the VFIO PCI driver claims that GPU, all display output is lost
> and the result is blank screens and no VT access.

I cc'd KVM folks in case they have anything to add here because I'm
not a VFIO passthrough expert.

It sounds like the problem occurs when the VFIO driver claims the GPU.
I assume that happens after boot, when setting up for the virtual
machine?  If so, is there a way to avoid the problem at run-time so
the admin doesn't have to decide at boot-time which GPU will be passed
through to a VM?  Is it possible or desirable to pass through GPU A to
VM A, then after VM A exits, pass through GPU B to VM B?

> Signed-off-by: Cal Peake <cp@absolutedigital.net>
> ---
>  .../admin-guide/kernel-parameters.txt         |  7 ++++
>  drivers/pci/vgaarb.c                          | 40 +++++++++++++++++++
>  2 files changed, 47 insertions(+)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 2522b11e593f..21ac87f4a8a9 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6518,6 +6518,13 @@
>  			This is actually a boot loader parameter; the value is
>  			passed to the kernel using a special protocol.
>  
> +	vgaarb.bootdev=	[PCI] Specify the PCI ID (e.g. 0e:00.0) of the
> +			device to use as the boot VGA device, overriding
> +			the heuristic used to normally determine which
> +			of the eligible VGA devices to use. If the device
> +			specified is not valid or not eligible, then we
> +			fallback to the heuristic.
> +
>  	vm_debug[=options]	[KNL] Available with CONFIG_DEBUG_VM=y.
>  			May slow down system boot speed, especially when
>  			enabled on systems with a large amount of memory.
> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> index f80b6ec88dc3..d3689b7dc63d 100644
> --- a/drivers/pci/vgaarb.c
> +++ b/drivers/pci/vgaarb.c
> @@ -35,6 +35,34 @@
>  
>  #include <linux/vgaarb.h>
>  
> +static char *bootdev __initdata;
> +module_param(bootdev, charp, 0);
> +MODULE_PARM_DESC(bootdev, "Force boot device to the specified PCI ID");
> +
> +/*
> + * Initialize to the last possible ID to have things work as normal
> + * when no 'bootdev' option is supplied. We especially do not want
> + * this to be zero (0) since that is a valid PCI ID (00:00.0).
> + */
> +static u16 bootdev_id = 0xffff;
> +
> +static void __init parse_bootdev(char *input)
> +{
> +	unsigned int bus, dev, func;
> +	int ret;
> +
> +	if (input == NULL)
> +		return;
> +
> +	ret = sscanf(input, "%x:%x.%x", &bus, &dev, &func);
> +	if (ret != 3) {
> +		pr_warn("Improperly formatted PCI ID: %s\n", input);
> +		return;
> +	}
> +
> +	bootdev_id = PCI_DEVID(bus, PCI_DEVFN(dev, func));
> +}
> +
>  static void vga_arbiter_notify_clients(void);
>  /*
>   * We keep a list of all vga devices in the system to speed
> @@ -53,6 +81,7 @@ struct vga_device {
>  	bool bridge_has_one_vga;
>  	bool is_firmware_default;	/* device selected by firmware */
>  	unsigned int (*set_decode)(struct pci_dev *pdev, bool decode);
> +	bool is_chosen_one;		/* device specified on command line */
>  };
>  
>  static LIST_HEAD(vga_list);
> @@ -605,6 +634,7 @@ static bool vga_is_boot_device(struct vga_device *vgadev)
>  
>  	/*
>  	 * We select the default VGA device in this order:
> +	 *   User specified device (see module param bootdev=)
>  	 *   Firmware framebuffer (see vga_arb_select_default_device())
>  	 *   Legacy VGA device (owns VGA_RSRC_LEGACY_MASK)
>  	 *   Non-legacy integrated device (see vga_arb_select_default_device())
> @@ -612,6 +642,14 @@ static bool vga_is_boot_device(struct vga_device *vgadev)
>  	 *   Other device (see vga_arb_select_default_device())
>  	 */
>  
> +	if (boot_vga && boot_vga->is_chosen_one)
> +		return false;
> +
> +	if (bootdev_id == PCI_DEVID(pdev->bus->number, pdev->devfn)) {
> +		vgadev->is_chosen_one = true;
> +		return true;
> +	}
> +
>  	/*
>  	 * We always prefer a firmware default device, so if we've already
>  	 * found one, there's no need to consider vgadev.
> @@ -1544,6 +1582,8 @@ static int __init vga_arb_device_init(void)
>  	int rc;
>  	struct pci_dev *pdev;
>  
> +	parse_bootdev(bootdev);
> +
>  	rc = misc_register(&vga_arb_device);
>  	if (rc < 0)
>  		pr_err("error %d registering device\n", rc);
> -- 
> 2.35.3
>
Cal Peake July 4, 2022, 11:07 p.m. UTC | #2
On Mon, 4 Jul 2022, Bjorn Helgaas wrote:

> I cc'd KVM folks in case they have anything to add here because I'm
> not a VFIO passthrough expert.
> 
> It sounds like the problem occurs when the VFIO driver claims the GPU.
> I assume that happens after boot, when setting up for the virtual
> machine?

No, this is during boot, long before a VM is launched. As you can kinda 
see from these lines from early on in the boot process:

[   22.066610] amdgpu 0000:0e:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=none:owns=none
[   25.726469] vfio-pci 0000:0f:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none

The vfio-pci driver claims the device like it was a typical GPU driver, 
but since it isn't, the display output functionality of the card stops 
because part of the vfio-pci driver's job is to make sure the card is in 
an unused, preferably pristine-as-possible state for when the VM takes 
control of it.

If we go back earlier in the boot process, you'll see that second line again:

[    9.226635] vfio-pci 0000:0f:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none
[    9.238385] vfio_pci: add [10de:1f06[ffffffff:ffffffff]] class 0x000000/00000000
[    9.251529] vfio_pci: add [10de:10f9[ffffffff:ffffffff]] class 0x000000/00000000
[    9.264328] vfio_pci: add [10de:1ada[ffffffff:ffffffff]] class 0x000000/00000000
[    9.277162] vfio_pci: add [10de:1adb[ffffffff:ffffffff]] class 0x000000/00000000

If that device is the one selected by the arbiter as boot device, then 
that is the point where display output stops and everything goes to black.

>  If so, is there a way to avoid the problem at run-time so the admin 
> doesn't have to decide at boot-time which GPU will be passed through to 
> a VM?

With the way that many people like me run this kind of setup, the 
passthrough GPU gets reserved at boot-time anyway with the passing of a 
line like:

vfio_pci.ids=10de:1f06,10de:10f9,10de:1ada,10de:1adb

on the kernel command-line from the bootloader. Doing a similar 
reservation for the host GPU with something like 'vgaarb.bootdev=0e:00.0' 
alongside it should be no big deal to anyone running a setup like this.

You can bind/unbind devices to the vfio-pci driver at run-time using 
sysfs[1], but as far as I can tell, there is no way to change the boot VGA 
device at run-time.

>  Is it possible or desirable to pass through GPU A to VM A, then after 
> VM A exits, pass through GPU B to VM B?

Yeah, there are many ways one can run this setup. Some run with a single 
GPU that gets passed-through and the host is headless. There's probably 
some with more than two GPUs with multiple VMs each getting their own.

The setup I'm running is pretty common: dedicated GPU for the host 
(doesn't need to be anything special, just needs to handle workstation 
duties) and a dedicated GPU for a Windows VM for gaming (something quite 
powerful for those high FPS :-)

As you can see, statically assigning the devices ahead of time is okay. 
The real problem (for me anyway) is there's no way in the UEFI/BIOS to 
tell the firmware which device should be used for boot. Sometimes it picks 
the first GPU, sometimes the second. If if picks wrong, I get an unusable 
system because the VGA arbiter deems the GPU selected by the firmware to 
be the best choice for boot VGA device.
Maxim Levitsky July 5, 2022, 11:39 a.m. UTC | #3
On Mon, 2022-07-04 at 19:07 -0400, Cal Peake wrote:
> On Mon, 4 Jul 2022, Bjorn Helgaas wrote:
> 
> > I cc'd KVM folks in case they have anything to add here because I'm
> > not a VFIO passthrough expert.
> > 
> > It sounds like the problem occurs when the VFIO driver claims the GPU.
> > I assume that happens after boot, when setting up for the virtual
> > machine?
> 
> No, this is during boot, long before a VM is launched. As you can kinda 
> see from these lines from early on in the boot process:
> 
> [   22.066610] amdgpu 0000:0e:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=none:owns=none
> [   25.726469] vfio-pci 0000:0f:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none
> 
> The vfio-pci driver claims the device like it was a typical GPU driver, 
> but since it isn't, the display output functionality of the card stops 
> because part of the vfio-pci driver's job is to make sure the card is in 
> an unused, preferably pristine-as-possible state for when the VM takes 
> control of it.
> 
> If we go back earlier in the boot process, you'll see that second line again:
> 
> [    9.226635] vfio-pci 0000:0f:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none
> [    9.238385] vfio_pci: add [10de:1f06[ffffffff:ffffffff]] class 0x000000/00000000
> [    9.251529] vfio_pci: add [10de:10f9[ffffffff:ffffffff]] class 0x000000/00000000
> [    9.264328] vfio_pci: add [10de:1ada[ffffffff:ffffffff]] class 0x000000/00000000
> [    9.277162] vfio_pci: add [10de:1adb[ffffffff:ffffffff]] class 0x000000/00000000
> 
> If that device is the one selected by the arbiter as boot device, then 
> that is the point where display output stops and everything goes to black.
> 
> >  If so, is there a way to avoid the problem at run-time so the admin 
> > doesn't have to decide at boot-time which GPU will be passed through to 
> > a VM?
> 
> With the way that many people like me run this kind of setup, the 
> passthrough GPU gets reserved at boot-time anyway with the passing of a 
> line like:
> 
> vfio_pci.ids=10de:1f06,10de:10f9,10de:1ada,10de:1adb
> 
> on the kernel command-line from the bootloader. Doing a similar 
> reservation for the host GPU with something like 'vgaarb.bootdev=0e:00.0' 
> alongside it should be no big deal to anyone running a setup like this.
> 
> You can bind/unbind devices to the vfio-pci driver at run-time using 
> sysfs[1], but as far as I can tell, there is no way to change the boot VGA 
> device at run-time.
> 
> >  Is it possible or desirable to pass through GPU A to VM A, then after 
> > VM A exits, pass through GPU B to VM B?
> 
> Yeah, there are many ways one can run this setup. Some run with a single 
> GPU that gets passed-through and the host is headless. There's probably 
> some with more than two GPUs with multiple VMs each getting their own.
> 
> The setup I'm running is pretty common: dedicated GPU for the host 
> (doesn't need to be anything special, just needs to handle workstation 
> duties) and a dedicated GPU for a Windows VM for gaming (something quite 
> powerful for those high FPS :-)
> 
> As you can see, statically assigning the devices ahead of time is okay. 
> The real problem (for me anyway) is there's no way in the UEFI/BIOS to 
> tell the firmware which device should be used for boot. Sometimes it picks 
> the first GPU, sometimes the second. If if picks wrong, I get an unusable 
> system because the VGA arbiter deems the GPU selected by the firmware to 
> be the best choice for boot VGA device.
> 

My 0.2 semi unrelated cents:

On my desktop system I have two GPUS (AMD workstation GPU and a NVIDIA's gpu), 
I sometimes use each of them (or even both) with VFIO,

But regardless of VFIO, I sometimes use one and sometimes another as my main GPU
(I have all displays connected to each GPU, its quite complex setup with lot
of cables and HDMI switches, but somehow it is actually quite robust)

Choosing boot GPU would be nice to have. On my system I setup it in such way
that AMD GPU gets to be the boot GPU (I don't remember if I blacklisted the
nvidia driver or something for that), and I have a script to dynamicallly
swith them prior to starting X if in a config file I created, I specified that
I want the nvidia GPU to be the default.

So this is a use case which doesn't involve VFIO.

Best regards,
	Maxim Levitsky
Alex Williamson July 5, 2022, 4:15 p.m. UTC | #4
On Mon, 4 Jul 2022 16:38:29 -0500
Bjorn Helgaas <helgaas@kernel.org> wrote:

> [+cc Alex, Cornelia, kvm]
> 
> On Mon, Jul 04, 2022 at 05:12:44PM -0400, Cal Peake wrote:
> > Add module parameter 'bootdev' to the VGA arbiter to allow the user
> > to choose which PCI device should be selected over any others as the
> > boot VGA device.
> > 
> > When using a multi-GPU system with one or more GPUs being used in
> > conjunction with VFIO for passthrough to a virtual machine, if the
> > VGA arbiter settles on a passthrough GPU as the boot VGA device,
> > once the VFIO PCI driver claims that GPU, all display output is lost
> > and the result is blank screens and no VT access.  
> 
> I cc'd KVM folks in case they have anything to add here because I'm
> not a VFIO passthrough expert.
> 
> It sounds like the problem occurs when the VFIO driver claims the GPU.
> I assume that happens after boot, when setting up for the virtual
> machine?  If so, is there a way to avoid the problem at run-time so
> the admin doesn't have to decide at boot-time which GPU will be passed
> through to a VM?  Is it possible or desirable to pass through GPU A to
> VM A, then after VM A exits, pass through GPU B to VM B?
> 
> > Signed-off-by: Cal Peake <cp@absolutedigital.net>
> > ---
> >  .../admin-guide/kernel-parameters.txt         |  7 ++++
> >  drivers/pci/vgaarb.c                          | 40 +++++++++++++++++++
> >  2 files changed, 47 insertions(+)
> > 
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index 2522b11e593f..21ac87f4a8a9 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -6518,6 +6518,13 @@
> >  			This is actually a boot loader parameter; the value is
> >  			passed to the kernel using a special protocol.
> >  
> > +	vgaarb.bootdev=	[PCI] Specify the PCI ID (e.g. 0e:00.0) of the
> > +			device to use as the boot VGA device, overriding
> > +			the heuristic used to normally determine which
> > +			of the eligible VGA devices to use. If the device
> > +			specified is not valid or not eligible, then we
> > +			fallback to the heuristic.
> > +
> >  	vm_debug[=options]	[KNL] Available with CONFIG_DEBUG_VM=y.
> >  			May slow down system boot speed, especially when
> >  			enabled on systems with a large amount of memory.
> > diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> > index f80b6ec88dc3..d3689b7dc63d 100644
> > --- a/drivers/pci/vgaarb.c
> > +++ b/drivers/pci/vgaarb.c
> > @@ -35,6 +35,34 @@
> >  
> >  #include <linux/vgaarb.h>
> >  
> > +static char *bootdev __initdata;
> > +module_param(bootdev, charp, 0);
> > +MODULE_PARM_DESC(bootdev, "Force boot device to the specified PCI ID");
> > +
> > +/*
> > + * Initialize to the last possible ID to have things work as normal
> > + * when no 'bootdev' option is supplied. We especially do not want
> > + * this to be zero (0) since that is a valid PCI ID (00:00.0).
> > + */
> > +static u16 bootdev_id = 0xffff;
> > +
> > +static void __init parse_bootdev(char *input)
> > +{
> > +	unsigned int bus, dev, func;
> > +	int ret;
> > +
> > +	if (input == NULL)
> > +		return;
> > +
> > +	ret = sscanf(input, "%x:%x.%x", &bus, &dev, &func);
> > +	if (ret != 3) {
> > +		pr_warn("Improperly formatted PCI ID: %s\n", input);
> > +		return;
> > +	}

See pci_dev_str_match()

> > +
> > +	bootdev_id = PCI_DEVID(bus, PCI_DEVFN(dev, func));
> > +}
> > +
> >  static void vga_arbiter_notify_clients(void);
> >  /*
> >   * We keep a list of all vga devices in the system to speed
> > @@ -53,6 +81,7 @@ struct vga_device {
> >  	bool bridge_has_one_vga;
> >  	bool is_firmware_default;	/* device selected by firmware */
> >  	unsigned int (*set_decode)(struct pci_dev *pdev, bool decode);
> > +	bool is_chosen_one;		/* device specified on command line */
> >  };
> >  
> >  static LIST_HEAD(vga_list);
> > @@ -605,6 +634,7 @@ static bool vga_is_boot_device(struct vga_device *vgadev)
> >  
> >  	/*
> >  	 * We select the default VGA device in this order:
> > +	 *   User specified device (see module param bootdev=)
> >  	 *   Firmware framebuffer (see vga_arb_select_default_device())
> >  	 *   Legacy VGA device (owns VGA_RSRC_LEGACY_MASK)
> >  	 *   Non-legacy integrated device (see vga_arb_select_default_device())
> > @@ -612,6 +642,14 @@ static bool vga_is_boot_device(struct vga_device *vgadev)
> >  	 *   Other device (see vga_arb_select_default_device())
> >  	 */
> >  
> > +	if (boot_vga && boot_vga->is_chosen_one)
> > +		return false;
> > +
> > +	if (bootdev_id == PCI_DEVID(pdev->bus->number, pdev->devfn)) {
> > +		vgadev->is_chosen_one = true;
> > +		return true;
> > +	}

This seems too simplistic, for example PCI code determines whether the
ROM is a shadow ROM at 0xc0000 based on whether it's the
vga_default_device() where that default device is set in
vga_arbiter_add_pci_device() based on the value returned by
this vga_is_boot_device() function.  A user wishing to specify the boot
VGA device doesn't magically make that device's ROM shadowed into this
location.

I also don't see how this actually enables VGA routing to the user
selected device, where we generally expect the boot device already has
this enabled.

Furthermore, what's the initialization state of the selected device, if
it has not had its option ROM executed, is it necessarily in a state to
accept VGA commands?  If we're changing the default VGA device, are we
fully uncoupling from any firmware notions of the console device?
Thanks,

Alex


> > +
> >  	/*
> >  	 * We always prefer a firmware default device, so if we've already
> >  	 * found one, there's no need to consider vgadev.
> > @@ -1544,6 +1582,8 @@ static int __init vga_arb_device_init(void)
> >  	int rc;
> >  	struct pci_dev *pdev;
> >  
> > +	parse_bootdev(bootdev);
> > +
> >  	rc = misc_register(&vga_arb_device);
> >  	if (rc < 0)
> >  		pr_err("error %d registering device\n", rc);
> > -- 
> > 2.35.3
> >   
>
Cal Peake July 5, 2022, 8:42 p.m. UTC | #5
On Tue, 5 Jul 2022, Alex Williamson wrote:

> > > +	ret = sscanf(input, "%x:%x.%x", &bus, &dev, &func);
> > > +	if (ret != 3) {
> > > +		pr_warn("Improperly formatted PCI ID: %s\n", input);
> > > +		return;
> > > +	}
> 
> See pci_dev_str_match()

Hi Alex, thanks for the feedback. I'll add this if we wind up going with 
some version of my patch.

> > > +	if (boot_vga && boot_vga->is_chosen_one)
> > > +		return false;
> > > +
> > > +	if (bootdev_id == PCI_DEVID(pdev->bus->number, pdev->devfn)) {
> > > +		vgadev->is_chosen_one = true;
> > > +		return true;
> > > +	}
> 
> This seems too simplistic, for example PCI code determines whether the
> ROM is a shadow ROM at 0xc0000 based on whether it's the
> vga_default_device() where that default device is set in
> vga_arbiter_add_pci_device() based on the value returned by
> this vga_is_boot_device() function.  A user wishing to specify the boot
> VGA device doesn't magically make that device's ROM shadowed into this
> location.
> 

I think I understand what you're saying. We're not telling the system what 
the boot device is, it's telling us?

> I also don't see how this actually enables VGA routing to the user
> selected device, where we generally expect the boot device already has
> this enabled.
> 
> Furthermore, what's the initialization state of the selected device, if
> it has not had its option ROM executed, is it necessarily in a state to
> accept VGA commands?  If we're changing the default VGA device, are we
> fully uncoupling from any firmware notions of the console device?
> Thanks,

Unfortunately, I'm not the best qualified to answer these questions. My 
understanding is mostly surface-level until I start digging into the code.

I think the answer to most of them though might be that the UEFI firmware
initializes both cards.

During POST, I do get output on both GPUs. One gets the static BIOS text 
(Copyright AMI etc.) -- this is the one selected as boot device -- and the 
other gets the POST-code counting up.

Once the firmware hands off to the bootloader, whichever GPU has the 
active display (both GPUs go to the same display, the input source gets 
switched depending on whether I'm using the host or the VM) is where 
the bootloader output is.

When the bootloader hands off to the kernel, the boot device chosen by the 
firmware gets the kernel output. If that's the host GPU, then everything 
is fine.

If that's the VM GPU, then it gets the kernel output until the vfio-pci 
driver loads and then all output stops. Back on the host GPU, the screen 
is black until the X server spawns[1] but I get no VTs.

With my patch, telling the arbiter that the host GPU is always the boot 
device results in everything just working.

With all that said, if you feel this isn't the right way to go, do you 
have any thoughts on what would be a better path to try?

Thanks,
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2522b11e593f..21ac87f4a8a9 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6518,6 +6518,13 @@ 
 			This is actually a boot loader parameter; the value is
 			passed to the kernel using a special protocol.
 
+	vgaarb.bootdev=	[PCI] Specify the PCI ID (e.g. 0e:00.0) of the
+			device to use as the boot VGA device, overriding
+			the heuristic used to normally determine which
+			of the eligible VGA devices to use. If the device
+			specified is not valid or not eligible, then we
+			fallback to the heuristic.
+
 	vm_debug[=options]	[KNL] Available with CONFIG_DEBUG_VM=y.
 			May slow down system boot speed, especially when
 			enabled on systems with a large amount of memory.
diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
index f80b6ec88dc3..d3689b7dc63d 100644
--- a/drivers/pci/vgaarb.c
+++ b/drivers/pci/vgaarb.c
@@ -35,6 +35,34 @@ 
 
 #include <linux/vgaarb.h>
 
+static char *bootdev __initdata;
+module_param(bootdev, charp, 0);
+MODULE_PARM_DESC(bootdev, "Force boot device to the specified PCI ID");
+
+/*
+ * Initialize to the last possible ID to have things work as normal
+ * when no 'bootdev' option is supplied. We especially do not want
+ * this to be zero (0) since that is a valid PCI ID (00:00.0).
+ */
+static u16 bootdev_id = 0xffff;
+
+static void __init parse_bootdev(char *input)
+{
+	unsigned int bus, dev, func;
+	int ret;
+
+	if (input == NULL)
+		return;
+
+	ret = sscanf(input, "%x:%x.%x", &bus, &dev, &func);
+	if (ret != 3) {
+		pr_warn("Improperly formatted PCI ID: %s\n", input);
+		return;
+	}
+
+	bootdev_id = PCI_DEVID(bus, PCI_DEVFN(dev, func));
+}
+
 static void vga_arbiter_notify_clients(void);
 /*
  * We keep a list of all vga devices in the system to speed
@@ -53,6 +81,7 @@  struct vga_device {
 	bool bridge_has_one_vga;
 	bool is_firmware_default;	/* device selected by firmware */
 	unsigned int (*set_decode)(struct pci_dev *pdev, bool decode);
+	bool is_chosen_one;		/* device specified on command line */
 };
 
 static LIST_HEAD(vga_list);
@@ -605,6 +634,7 @@  static bool vga_is_boot_device(struct vga_device *vgadev)
 
 	/*
 	 * We select the default VGA device in this order:
+	 *   User specified device (see module param bootdev=)
 	 *   Firmware framebuffer (see vga_arb_select_default_device())
 	 *   Legacy VGA device (owns VGA_RSRC_LEGACY_MASK)
 	 *   Non-legacy integrated device (see vga_arb_select_default_device())
@@ -612,6 +642,14 @@  static bool vga_is_boot_device(struct vga_device *vgadev)
 	 *   Other device (see vga_arb_select_default_device())
 	 */
 
+	if (boot_vga && boot_vga->is_chosen_one)
+		return false;
+
+	if (bootdev_id == PCI_DEVID(pdev->bus->number, pdev->devfn)) {
+		vgadev->is_chosen_one = true;
+		return true;
+	}
+
 	/*
 	 * We always prefer a firmware default device, so if we've already
 	 * found one, there's no need to consider vgadev.
@@ -1544,6 +1582,8 @@  static int __init vga_arb_device_init(void)
 	int rc;
 	struct pci_dev *pdev;
 
+	parse_bootdev(bootdev);
+
 	rc = misc_register(&vga_arb_device);
 	if (rc < 0)
 		pr_err("error %d registering device\n", rc);