diff mbox

2.6.28-git8: tg3 doesn't work due to firmware not loading (-git7 is ok)

Message ID 1231678799.25018.195.camel@macbook.infradead.org
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

David Woodhouse Jan. 11, 2009, 12:59 p.m. UTC
On Sun, 2009-01-11 at 12:25 +0000, David Woodhouse wrote:
> I'll take a look and see if I can remedy that. Then we wouldn't _need_
> the FIRMWARE_IN_KERNEL option.

How about this? If it fails to load the firmware from userspace during
the initialisation, it'll try again later in tg3_open(). 

I _think_ that's fine, because we don't do anything else in the early
initialisation which requires the firmware to be loaded.

So if you build with CONFIG_TIGON3=y, CONFIG_FIRMWARE_IN_KERNEL=n, you
should see it fail to load the firmware at boot, but then it should load
it successfully when you bring the device up.

Untested-but-otherwise-Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

Comments

Alessandro Suardi Jan. 11, 2009, 4:42 p.m. UTC | #1
On Sun, Jan 11, 2009 at 1:59 PM, David Woodhouse <dwmw2@infradead.org> wrote:
> On Sun, 2009-01-11 at 12:25 +0000, David Woodhouse wrote:
>> I'll take a look and see if I can remedy that. Then we wouldn't _need_
>> the FIRMWARE_IN_KERNEL option.
>
> How about this? If it fails to load the firmware from userspace during
> the initialisation, it'll try again later in tg3_open().
>
> I _think_ that's fine, because we don't do anything else in the early
> initialisation which requires the firmware to be loaded.
>
> So if you build with CONFIG_TIGON3=y, CONFIG_FIRMWARE_IN_KERNEL=n, you
> should see it fail to load the firmware at boot, but then it should load
> it successfully when you bring the device up.
>
> Untested-but-otherwise-Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
>
> diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
> index 5e2dbae..f99218c 100644
> --- a/drivers/net/tg3.c
> +++ b/drivers/net/tg3.c
> @@ -7535,11 +7535,45 @@ static int tg3_test_msi(struct tg3 *tp)
>        return err;
>  }
>
> +static int tg3_request_firmware(struct tg3 *tp)
> +{
> +       const __be32 *fw_data;
> +
> +       if (request_firmware(&tp->fw, tp->fw_needed, &tp->pdev->dev)) {
> +               printk(KERN_ERR "tg3: Failed to load firmware \"%s\"\n",
> +                      tp->fw_needed);
> +               return -ENOENT;
> +       }
> +
> +       fw_data = (void *)tp->fw->data;
> +
> +       /* Firmware blob starts with version numbers, followed by
> +          start address and _full_ length including BSS sections
> +          (which must be longer than the actual data, of course */
> +
> +       tp->fw_len = be32_to_cpu(fw_data[2]);   /* includes bss */
> +       if (tp->fw_len < (tp->fw->size - 12)) {
> +               printk(KERN_ERR "tg3: bogus length %d in \"%s\"\n",
> +                      tp->fw_len, tp->fw_needed);
> +               return -EINVAL;
> +       }
> +
> +       /* We no longer need firmware; we have it. */
> +       tp->fw_needed = NULL;
> +       return 0;
> +}
> +
>  static int tg3_open(struct net_device *dev)
>  {
>        struct tg3 *tp = netdev_priv(dev);
>        int err;
>
> +       if (tp->fw_needed) {
> +               err = tg3_request_firmware(tp);
> +               if (err)
> +                       return err;
> +       }
> +
>        netif_carrier_off(tp->dev);
>
>        err = tg3_set_power_state(tp, PCI_D0);
> @@ -12934,7 +12968,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
>        struct net_device *dev;
>        struct tg3 *tp;
>        int err, pm_cap;
> -       const char *fw_name = NULL;
>        char str[40];
>        u64 dma_mask, persist_dma_mask;
>
> @@ -13091,7 +13124,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
>        tg3_init_bufmgr_config(tp);
>
>        if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0)
> -               fw_name = FIRMWARE_TG3;
> +               tp->fw_needed = FIRMWARE_TG3;
>
>        if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
>                tp->tg3_flags2 |= TG3_FLG2_TSO_CAPABLE;
> @@ -13107,34 +13140,19 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
>        }
>        if (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) {
>                if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)
> -                       fw_name = FIRMWARE_TG3TSO5;
> +                       tp->fw_needed = FIRMWARE_TG3TSO5;
>                else
> -                       fw_name = FIRMWARE_TG3TSO;
> +                       tp->fw_needed = FIRMWARE_TG3TSO;
>        }
>
> -       if (fw_name) {
> -               const __be32 *fw_data;
> -
> -               err = request_firmware(&tp->fw, fw_name, &tp->pdev->dev);
> -               if (err) {
> -                       printk(KERN_ERR "tg3: Failed to load firmware \"%s\"\n",
> -                              fw_name);
> -                       goto err_out_iounmap;
> -               }
> -
> -               fw_data = (void *)tp->fw->data;
> -
> -               /* Firmware blob starts with version numbers, followed by
> -                  start address and _full_ length including BSS sections
> -                  (which must be longer than the actual data, of course */
> -
> -               tp->fw_len = be32_to_cpu(fw_data[2]);   /* includes bss */
> -               if (tp->fw_len < (tp->fw->size - 12)) {
> -                       printk(KERN_ERR "tg3: bogus length %d in \"%s\"\n",
> -                              tp->fw_len, fw_name);
> -                       err = -EINVAL;
> +       if (tp->fw_needed) {
> +               err = tg3_request_firmware(tp);
> +               /* Failure to load firmware at this stage is not fatal; we'll
> +                  try again in tg3_open(). So if you have the driver built
> +                  into the kernel, you can still get the firmware loaded
> +                  after userspace is running, when the device comes up. */
> +               if (err != -ENOENT)
>                        goto err_out_fw;
> -               }
>        }
>
>        /* TSO is on by default on chips that support hardware TSO.

Patches cleanly but doesn't build in 2.6.29-rc1:

  CC      drivers/leds/led-core.o
  CC      drivers/leds/led-class.o
  LD      drivers/leds/built-in.o
  CC      drivers/net/tg3.o
drivers/net/tg3.c: In function 'tg3_request_firmware':
drivers/net/tg3.c:7542: error: 'struct tg3' has no member named 'fw_needed'
drivers/net/tg3.c:7544: error: 'struct tg3' has no member named 'fw_needed'
drivers/net/tg3.c:7557: error: 'struct tg3' has no member named 'fw_needed'
drivers/net/tg3.c:7562: error: 'struct tg3' has no member named 'fw_needed'
drivers/net/tg3.c: In function 'tg3_open':
drivers/net/tg3.c:7571: error: 'struct tg3' has no member named 'fw_needed'
drivers/net/tg3.c: In function 'tg3_init_one':
drivers/net/tg3.c:13127: error: 'struct tg3' has no member named 'fw_needed'
drivers/net/tg3.c:13143: error: 'struct tg3' has no member named 'fw_needed'
drivers/net/tg3.c:13145: error: 'struct tg3' has no member named 'fw_needed'
drivers/net/tg3.c:13148: error: 'struct tg3' has no member named 'fw_needed'
make[2]: *** [drivers/net/tg3.o] Error 1
make[1]: *** [drivers/net] Error 2
make: *** [drivers] Error 2
[asuardi@sandman linux-2.6.29-rc1]$


Indeed, struct tg3 in tg3.h doesn't have fw_needed here...


--alessandro

 "Sun keeps rising in the west / I keep on waking fully confused"

   (The Replacements, "Within Your Reach")
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Valdis Kl ē tnieks Jan. 11, 2009, 4:48 p.m. UTC | #2
On Sun, 11 Jan 2009 12:59:59 GMT, David Woodhouse said:
> On Sun, 2009-01-11 at 12:25 +0000, David Woodhouse wrote:
> > I'll take a look and see if I can remedy that. Then we wouldn't _need_
> > the FIRMWARE_IN_KERNEL option.
> 
> How about this? If it fails to load the firmware from userspace during
> the initialisation, it'll try again later in tg3_open(). 
> 
> I _think_ that's fine, because we don't do anything else in the early
> initialisation which requires the firmware to be loaded.
> 
> So if you build with CONFIG_TIGON3=y, CONFIG_FIRMWARE_IN_KERNEL=n, you
> should see it fail to load the firmware at boot, but then it should load
> it successfully when you bring the device up.
> 
> Untested-but-otherwise-Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>

I'll see if I can give it a test drive sometime in the next 24 hours or so.

One unanswered question:  What do we expect the system to do if they have this
patch, TIGON3=y, FIRMWARE_IN_KERNEL=n, and configure a netconsole for boot
messages?  I'm *hoping* the answer is "the netconsole doesn't come up at boot,
but can be re-enabled via the /sys/kernel/config/netconsole interface after
you've done an 'ifconfig eth0 up' or similar, or do a 'modprobe netconsole'.

Those seem like reasonable semantics to me - anybody got a different opinion?
David Woodhouse Jan. 11, 2009, 4:56 p.m. UTC | #3
On Sun, 2009-01-11 at 11:48 -0500, Valdis.Kletnieks@vt.edu wrote:
> One unanswered question:  What do we expect the system to do if they have this
> patch, TIGON3=y, FIRMWARE_IN_KERNEL=n, and configure a netconsole for boot
> messages?  I'm *hoping* the answer is "the netconsole doesn't come up at boot,
> but can be re-enabled via the /sys/kernel/config/netconsole interface after
> you've done an 'ifconfig eth0 up' or similar, or do a 'modprobe netconsole'.
> 
> Those seem like reasonable semantics to me - anybody got a different opinion?

If netconsole follows the normal ->open() path, that sounds like what'll
happen.
David Miller Jan. 11, 2009, 9:41 p.m. UTC | #4
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 11 Jan 2009 12:59:59 +0000

> So if you build with CONFIG_TIGON3=y, CONFIG_FIRMWARE_IN_KERNEL=n, you
> should see it fail to load the firmware at boot, but then it should load
> it successfully when you bring the device up.

And you won't be able to mount an NFS root filesystem.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Jan. 11, 2009, 9:49 p.m. UTC | #5
From: Valdis.Kletnieks@vt.edu
Date: Sun, 11 Jan 2009 11:48:22 -0500

> One unanswered question:  What do we expect the system to do if they have this
> patch, TIGON3=y, FIRMWARE_IN_KERNEL=n, and configure a netconsole for boot
> messages?  I'm *hoping* the answer is "the netconsole doesn't come up at boot,
> but can be re-enabled via the /sys/kernel/config/netconsole interface after
> you've done an 'ifconfig eth0 up' or similar, or do a 'modprobe netconsole'.
> 
> Those seem like reasonable semantics to me - anybody got a different opinion?

Even better, how about nfsroot?  There is no "later", either you
can mount to root filesystem or you fail.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Valdis Kl ē tnieks Jan. 12, 2009, 12:10 a.m. UTC | #6
On Sun, 11 Jan 2009 12:59:59 GMT, David Woodhouse said:
> On Sun, 2009-01-11 at 12:25 +0000, David Woodhouse wrote:
> > I'll take a look and see if I can remedy that. Then we wouldn't _need_
> > the FIRMWARE_IN_KERNEL option.
> 
> How about this? If it fails to load the firmware from userspace during
> the initialisation, it'll try again later in tg3_open(). 
> 
> I _think_ that's fine, because we don't do anything else in the early
> initialisation which requires the firmware to be loaded.
> 
> So if you build with CONFIG_TIGON3=y, CONFIG_FIRMWARE_IN_KERNEL=n, you
> should see it fail to load the firmware at boot, but then it should load
> it successfully when you bring the device up.
> 
> Untested-but-otherwise-Signed-off-by: David Woodhouse <David.Woodhouse@intel.
com>
> 
> diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c

So I took it for a test drive, using my currently-working .config, which has

CONFIG_TIGON3=y
CONFIG_FIRMWARE_IN_KERNEL=y

In the dmesg I see during early bootup:

[    0.694230] loop: module loaded
[    0.694249] tg3.c:v3.97 (December 10, 2008)
[    0.694261] vendor=8086 device=27d4
[    0.694265] tg3 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
[    0.694274] tg3 0000:09:00.0: setting latency timer to 64
[    0.696087] tg3 0000:09:00.0: wake-up capability disabled by ACPI
[    0.696094] tg3 0000:09:00.0: PME# disabled
[    0.702276] tg3 0000:09:00.0: firmware: using built-in firmware tigon/tg3_tso.bin
[    0.702287] vendor=8086 device=27d4
[    0.702288] tg3 0000:09:00.0: PCI INT A disabled
[    0.702512] console [netcon0] enabled
[    0.702515] netconsole: network logging started
[    0.702575] Driver 'sd' needs updating - please use bus_type methods

but once we get to userspace, 'ifconfig' or 'ip link show' have *zero*
about an eth0 device.

For comparison, the dmesg if I revert your patch:

[    0.696638] loop: module loaded
[    0.696658] tg3.c:v3.97 (December 10, 2008)
[    0.696670] vendor=8086 device=27d4
[    0.696674] tg3 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
[    0.696683] tg3 0000:09:00.0: setting latency timer to 64
[    0.698063] tg3 0000:09:00.0: wake-up capability disabled by ACPI
[    0.698070] tg3 0000:09:00.0: PME# disabled
[    0.704276] tg3 0000:09:00.0: firmware: using built-in firmware tigon/tg3_tso.bin
[    0.704445] eth0: Tigon3 [partno(BCM5752KFBG) rev 6002] (PCI Express) MAC address 00:15:c5:c8:33:4e
[    0.704448] eth0: attached PHY is 5752 (10/100/1000Base-T Ethernet) (WireSpeed[1])
[    0.704451] eth0: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] TSOcap[1]
[    0.704453] eth0: dma_rwctrl[76180000] dma_mask[64-bit]
[    0.704653] console [netcon0] enabled
[    0.704656] netconsole: network logging started
[    0.704718] Driver 'sd' needs updating - please use bus_type methods

So it looks like the patch is failing to finish initialization of the card.
Damned if *I* can see what's breaking it, the conversion to use a helper
function tg3_request_firmware seems sane enough....
Valdis Kl ē tnieks Jan. 12, 2009, 1:39 a.m. UTC | #7
On Sun, 11 Jan 2009 19:10:36 EST, Valdis.Kletnieks@vt.edu said:

> In the dmesg I see during early bootup:
> 
> [    0.694230] loop: module loaded
> [    0.694249] tg3.c:v3.97 (December 10, 2008)
> [    0.694261] vendor=8086 device=27d4
> [    0.694265] tg3 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
> [    0.694274] tg3 0000:09:00.0: setting latency timer to 64
> [    0.696087] tg3 0000:09:00.0: wake-up capability disabled by ACPI
> [    0.696094] tg3 0000:09:00.0: PME# disabled
> [    0.702276] tg3 0000:09:00.0: firmware: using built-in firmware tigon/tg3_tso.bin
> [    0.702287] vendor=8086 device=27d4
> [    0.702288] tg3 0000:09:00.0: PCI INT A disabled
> [    0.702512] console [netcon0] enabled
> [    0.702515] netconsole: network logging started
> [    0.702575] Driver 'sd' needs updating - please use bus_type methods
> 
> but once we get to userspace, 'ifconfig' or 'ip link show' have *zero*
> about an eth0 device.
> 
> For comparison, the dmesg if I revert your patch:
> 
> [    0.696638] loop: module loaded
> [    0.696658] tg3.c:v3.97 (December 10, 2008)
> [    0.696670] vendor=8086 device=27d4
> [    0.696674] tg3 0000:09:00.0: PCI INT A -> GSI 18 (level, low) -> IRQ 18
> [    0.696683] tg3 0000:09:00.0: setting latency timer to 64
> [    0.698063] tg3 0000:09:00.0: wake-up capability disabled by ACPI
> [    0.698070] tg3 0000:09:00.0: PME# disabled
> [    0.704276] tg3 0000:09:00.0: firmware: using built-in firmware tigon/tg3_tso.bin
> [    0.704445] eth0: Tigon3 [partno(BCM5752KFBG) rev 6002] (PCI Express) MAC address 00:15:c5:c8:33:4e
> [    0.704448] eth0: attached PHY is 5752 (10/100/1000Base-T Ethernet) (WireSpeed[1])
> [    0.704451] eth0: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[0] TSOcap[1]
> [    0.704453] eth0: dma_rwctrl[76180000] dma_mask[64-bit]
> [    0.704653] console [netcon0] enabled
> [    0.704656] netconsole: network logging started
> [    0.704718] Driver 'sd' needs updating - please use bus_type methods
> 
> So it looks like the patch is failing to finish initialization of the card.
> Damned if *I* can see what's breaking it, the conversion to use a helper
> function tg3_request_firmware seems sane enough....

Damn.  I wonder if netconsole's initialization is turning around and stomping
on everything?  This looks suspicious:

static int tg3_open(struct net_device *dev)
 {
        struct tg3 *tp = netdev_priv(dev);
        int err;
 
+       if (tp->fw_needed) {

Do we know for sure that tp-> struct is the same one we set up back in
tg3_request_firmware, *and* that tg3_open() doesn't get called before
tg3_init_one() (which would result in an uninitialized fw_needed)?
diff mbox

Patch

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 5e2dbae..f99218c 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7535,11 +7535,45 @@  static int tg3_test_msi(struct tg3 *tp)
 	return err;
 }
 
+static int tg3_request_firmware(struct tg3 *tp)
+{
+	const __be32 *fw_data;
+
+	if (request_firmware(&tp->fw, tp->fw_needed, &tp->pdev->dev)) {
+		printk(KERN_ERR "tg3: Failed to load firmware \"%s\"\n",
+		       tp->fw_needed);
+		return -ENOENT;
+	}
+
+	fw_data = (void *)tp->fw->data;
+
+	/* Firmware blob starts with version numbers, followed by
+	   start address and _full_ length including BSS sections
+	   (which must be longer than the actual data, of course */
+
+	tp->fw_len = be32_to_cpu(fw_data[2]);	/* includes bss */
+	if (tp->fw_len < (tp->fw->size - 12)) {
+		printk(KERN_ERR "tg3: bogus length %d in \"%s\"\n",
+		       tp->fw_len, tp->fw_needed);
+		return -EINVAL;
+	}
+
+	/* We no longer need firmware; we have it. */
+	tp->fw_needed = NULL;
+	return 0;
+}
+
 static int tg3_open(struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	int err;
 
+	if (tp->fw_needed) {
+		err = tg3_request_firmware(tp);
+		if (err)
+			return err;
+	}
+
 	netif_carrier_off(tp->dev);
 
 	err = tg3_set_power_state(tp, PCI_D0);
@@ -12934,7 +12968,6 @@  static int __devinit tg3_init_one(struct pci_dev *pdev,
 	struct net_device *dev;
 	struct tg3 *tp;
 	int err, pm_cap;
-	const char *fw_name = NULL;
 	char str[40];
 	u64 dma_mask, persist_dma_mask;
 
@@ -13091,7 +13124,7 @@  static int __devinit tg3_init_one(struct pci_dev *pdev,
 	tg3_init_bufmgr_config(tp);
 
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0)
-		fw_name = FIRMWARE_TG3;
+		tp->fw_needed = FIRMWARE_TG3;
 
 	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
 		tp->tg3_flags2 |= TG3_FLG2_TSO_CAPABLE;
@@ -13107,34 +13140,19 @@  static int __devinit tg3_init_one(struct pci_dev *pdev,
 	}
 	if (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) {
 		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)
-			fw_name = FIRMWARE_TG3TSO5;
+			tp->fw_needed = FIRMWARE_TG3TSO5;
 		else
-			fw_name = FIRMWARE_TG3TSO;
+			tp->fw_needed = FIRMWARE_TG3TSO;
 	}
 
-	if (fw_name) {
-		const __be32 *fw_data;
-
-		err = request_firmware(&tp->fw, fw_name, &tp->pdev->dev);
-		if (err) {
-			printk(KERN_ERR "tg3: Failed to load firmware \"%s\"\n",
-			       fw_name);
-			goto err_out_iounmap;
-		}
-
-		fw_data = (void *)tp->fw->data;
-
-		/* Firmware blob starts with version numbers, followed by
-		   start address and _full_ length including BSS sections
-		   (which must be longer than the actual data, of course */
-
-		tp->fw_len = be32_to_cpu(fw_data[2]);	/* includes bss */
-		if (tp->fw_len < (tp->fw->size - 12)) {
-			printk(KERN_ERR "tg3: bogus length %d in \"%s\"\n",
-			       tp->fw_len, fw_name);
-			err = -EINVAL;
+	if (tp->fw_needed) {
+		err = tg3_request_firmware(tp);
+		/* Failure to load firmware at this stage is not fatal; we'll
+		   try again in tg3_open(). So if you have the driver built
+		   into the kernel, you can still get the firmware loaded
+		   after userspace is running, when the device comes up. */
+		if (err != -ENOENT)
 			goto err_out_fw;
-		}
 	}
 
 	/* TSO is on by default on chips that support hardware TSO.