diff mbox series

[1/3] ALSA: hda/tegra: Skip reset on BPMP devices

Message ID 1638858770-22594-2-git-send-email-spujar@nvidia.com
State Changes Requested
Headers show
Series Fix Tegra194 HDA regression | expand

Commit Message

Sameer Pujar Dec. 7, 2021, 6:32 a.m. UTC
HDA regression is recently reported on Tegra194 based platforms.
This happens because "hda2codec_2x" reset does not really exist
in Tegra194 and it causes probe failure. All the HDA based audio
tests fail at the moment. This underlying issue is exposed by
commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
response") which now checks return code of BPMP command response.

The failure can be fixed by avoiding above reset in the driver,
but the explicit reset is not necessary for Tegra devices which
depend on BPMP. On such devices, BPMP ensures reset application
during unpowergate calls. Hence skip reset on these devices
which is applicable for Tegra186 and later.

Signed-off-by: Sameer Pujar <spujar@nvidia.com>
Cc: stable@vger.kernel.org
Depends-on: 87f0e46e7559 ("ALSA: hda/tegra: Reset hardware")
---
 sound/pci/hda/hda_tegra.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

Comments

Thierry Reding Dec. 7, 2021, 8:16 a.m. UTC | #1
On Tue, Dec 07, 2021 at 12:02:48PM +0530, Sameer Pujar wrote:
> HDA regression is recently reported on Tegra194 based platforms.
> This happens because "hda2codec_2x" reset does not really exist
> in Tegra194 and it causes probe failure. All the HDA based audio
> tests fail at the moment. This underlying issue is exposed by
> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
> response") which now checks return code of BPMP command response.
> 
> The failure can be fixed by avoiding above reset in the driver,
> but the explicit reset is not necessary for Tegra devices which
> depend on BPMP. On such devices, BPMP ensures reset application
> during unpowergate calls. Hence skip reset on these devices
> which is applicable for Tegra186 and later.
> 
> Signed-off-by: Sameer Pujar <spujar@nvidia.com>
> Cc: stable@vger.kernel.org
> Depends-on: 87f0e46e7559 ("ALSA: hda/tegra: Reset hardware")
> ---
>  sound/pci/hda/hda_tegra.c | 24 +++++++++++++++++++-----
>  1 file changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
> index ea700395..862141e 100644
> --- a/sound/pci/hda/hda_tegra.c
> +++ b/sound/pci/hda/hda_tegra.c
> @@ -68,6 +68,10 @@
>   */
>  #define TEGRA194_NUM_SDO_LINES	  4
>  
> +struct hda_data {
> +	unsigned int do_reset:1;
> +};

I suppose this could also be a bool. Not sure if we need to care about
packing optimizations at this point.

It may also be useful to rename this to something less generic to avoid
potential clashes with other data structures in the future. We've often
used the _soc suffix in other drivers to mark this kind of SoC-specific
data. In this case it would be struct hda_tegra_soc.

If Takashi is fine with this as-is, I don't have any strong objections,
though.

> +
>  struct hda_tegra {
>  	struct azx chip;
>  	struct device *dev;
> @@ -76,6 +80,7 @@ struct hda_tegra {
>  	unsigned int nclocks;
>  	void __iomem *regs;
>  	struct work_struct probe_work;
> +	const struct hda_data *data;
>  };
>  
>  #ifdef CONFIG_PM
> @@ -427,8 +432,13 @@ static int hda_tegra_create(struct snd_card *card,
>  	return 0;
>  }
>  
> +static const struct hda_data tegra30_data = {
> +	.do_reset = 1,
> +};
> +
>  static const struct of_device_id hda_tegra_match[] = {
> -	{ .compatible = "nvidia,tegra30-hda" },
> +	{ .compatible = "nvidia,tegra30-hda", .data = &tegra30_data },
> +	{ .compatible = "nvidia,tegra186-hda" },
>  	{ .compatible = "nvidia,tegra194-hda" },
>  	{},
>  };

One other thing we've done in the past is to explicitly pass these
structures for each compatible string. That simplifies things a bit
because we don't have to keep checking for non-NULL pointers and instead
rely on the fact that there's always a valid pointer.

To do so, you'd basically add:

	static const struct hda_data tegra186_data = {
		.do_reset = 0,
	};

And reference that for both the Tegra186 and Tegra194 entries. Again,
not strictly necessary and since we have only one occurrence where we
need to check this, it seems fine as-is, so:

Acked-by: Thierry Reding <treding@nvidia.com>
Takashi Iwai Dec. 7, 2021, 8:36 a.m. UTC | #2
On Tue, 07 Dec 2021 09:16:43 +0100,
Thierry Reding wrote:
> 
> On Tue, Dec 07, 2021 at 12:02:48PM +0530, Sameer Pujar wrote:
> > HDA regression is recently reported on Tegra194 based platforms.
> > This happens because "hda2codec_2x" reset does not really exist
> > in Tegra194 and it causes probe failure. All the HDA based audio
> > tests fail at the moment. This underlying issue is exposed by
> > commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
> > response") which now checks return code of BPMP command response.
> > 
> > The failure can be fixed by avoiding above reset in the driver,
> > but the explicit reset is not necessary for Tegra devices which
> > depend on BPMP. On such devices, BPMP ensures reset application
> > during unpowergate calls. Hence skip reset on these devices
> > which is applicable for Tegra186 and later.
> > 
> > Signed-off-by: Sameer Pujar <spujar@nvidia.com>
> > Cc: stable@vger.kernel.org
> > Depends-on: 87f0e46e7559 ("ALSA: hda/tegra: Reset hardware")
> > ---
> >  sound/pci/hda/hda_tegra.c | 24 +++++++++++++++++++-----
> >  1 file changed, 19 insertions(+), 5 deletions(-)
> > 
> > diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
> > index ea700395..862141e 100644
> > --- a/sound/pci/hda/hda_tegra.c
> > +++ b/sound/pci/hda/hda_tegra.c
> > @@ -68,6 +68,10 @@
> >   */
> >  #define TEGRA194_NUM_SDO_LINES	  4
> >  
> > +struct hda_data {
> > +	unsigned int do_reset:1;
> > +};
> 
> I suppose this could also be a bool. Not sure if we need to care about
> packing optimizations at this point.
> 
> It may also be useful to rename this to something less generic to avoid
> potential clashes with other data structures in the future. We've often
> used the _soc suffix in other drivers to mark this kind of SoC-specific
> data. In this case it would be struct hda_tegra_soc.
> 
> If Takashi is fine with this as-is, I don't have any strong objections,
> though.

Indeed, a bit more prefix would be better for avoiding the possible
conflict in future, but the struct name is local, so I don't mind to
use the simple name for now.  We can change it later once when needed,
too. 

> > +
> >  struct hda_tegra {
> >  	struct azx chip;
> >  	struct device *dev;
> > @@ -76,6 +80,7 @@ struct hda_tegra {
> >  	unsigned int nclocks;
> >  	void __iomem *regs;
> >  	struct work_struct probe_work;
> > +	const struct hda_data *data;
> >  };
> >  
> >  #ifdef CONFIG_PM
> > @@ -427,8 +432,13 @@ static int hda_tegra_create(struct snd_card *card,
> >  	return 0;
> >  }
> >  
> > +static const struct hda_data tegra30_data = {
> > +	.do_reset = 1,
> > +};
> > +
> >  static const struct of_device_id hda_tegra_match[] = {
> > -	{ .compatible = "nvidia,tegra30-hda" },
> > +	{ .compatible = "nvidia,tegra30-hda", .data = &tegra30_data },
> > +	{ .compatible = "nvidia,tegra186-hda" },
> >  	{ .compatible = "nvidia,tegra194-hda" },
> >  	{},
> >  };
> 
> One other thing we've done in the past is to explicitly pass these
> structures for each compatible string. That simplifies things a bit
> because we don't have to keep checking for non-NULL pointers and instead
> rely on the fact that there's always a valid pointer.
> 
> To do so, you'd basically add:
> 
> 	static const struct hda_data tegra186_data = {
> 		.do_reset = 0,
> 	};
> 
> And reference that for both the Tegra186 and Tegra194 entries. Again,
> not strictly necessary and since we have only one occurrence where we
> need to check this, it seems fine as-is, so:
> 
> Acked-by: Thierry Reding <treding@nvidia.com>

That's true, too.  OTOH, completely without a NULL check would be also
unsafe, so some sanity check would be still required.

That said, the current patch is good enough for taking as a regression
fix, but I'm fine to wait for a while for v2 to address those, too :)


thanks,

Takashi
Sameer Pujar Dec. 7, 2021, 9:09 a.m. UTC | #3
On 12/7/2021 2:06 PM, Takashi Iwai wrote:
> On Tue, 07 Dec 2021 09:16:43 +0100,
> Thierry Reding wrote:
>> I suppose this could also be a bool. Not sure if we need to care about
>> packing optimizations at this point.
>>
>> It may also be useful to rename this to something less generic to avoid
>> potential clashes with other data structures in the future. We've often
>> used the _soc suffix in other drivers to mark this kind of SoC-specific
>> data. In this case it would be struct hda_tegra_soc.
>>
>> If Takashi is fine with this as-is, I don't have any strong objections,
>> though.
> Indeed, a bit more prefix would be better for avoiding the possible
> conflict in future, but the struct name is local, so I don't mind to
> use the simple name for now.  We can change it later once when needed,
> too.

[...]

>>
>> One other thing we've done in the past is to explicitly pass these
>> structures for each compatible string. That simplifies things a bit
>> because we don't have to keep checking for non-NULL pointers and instead
>> rely on the fact that there's always a valid pointer.
>>
>> To do so, you'd basically add:
>>
>>        static const struct hda_data tegra186_data = {
>>                .do_reset = 0,
>>        };
>>
>> And reference that for both the Tegra186 and Tegra194 entries. Again,
>> not strictly necessary and since we have only one occurrence where we
>> need to check this, it seems fine as-is, so:
>>
>> Acked-by: Thierry Reding <treding@nvidia.com>
> That's true, too.  OTOH, completely without a NULL check would be also
> unsafe, so some sanity check would be still required.
>
> That said, the current patch is good enough for taking as a regression
> fix, but I'm fine to wait for a while for v2 to address those, too :)
>

Let me send a v2 for above.
Dmitry Osipenko Dec. 7, 2021, 10:22 a.m. UTC | #4
07.12.2021 09:32, Sameer Pujar пишет:
> HDA regression is recently reported on Tegra194 based platforms.
> This happens because "hda2codec_2x" reset does not really exist
> in Tegra194 and it causes probe failure. All the HDA based audio
> tests fail at the moment. This underlying issue is exposed by
> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
> response") which now checks return code of BPMP command response.
> 
> The failure can be fixed by avoiding above reset in the driver,
> but the explicit reset is not necessary for Tegra devices which
> depend on BPMP. On such devices, BPMP ensures reset application
> during unpowergate calls. Hence skip reset on these devices
> which is applicable for Tegra186 and later.

The power domain is shared with the display, AFAICS. The point of reset
is to bring h/w into predictable state. It doesn't make sense to me to
skip the reset.

If T194+ doesn't have hda2codec_2x reset, then don't request that reset
for T194+.
Dmitry Osipenko Dec. 7, 2021, 10:44 a.m. UTC | #5
07.12.2021 13:22, Dmitry Osipenko пишет:
> 07.12.2021 09:32, Sameer Pujar пишет:
>> HDA regression is recently reported on Tegra194 based platforms.
>> This happens because "hda2codec_2x" reset does not really exist
>> in Tegra194 and it causes probe failure. All the HDA based audio
>> tests fail at the moment. This underlying issue is exposed by
>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>> response") which now checks return code of BPMP command response.
>>
>> The failure can be fixed by avoiding above reset in the driver,
>> but the explicit reset is not necessary for Tegra devices which
>> depend on BPMP. On such devices, BPMP ensures reset application
>> during unpowergate calls. Hence skip reset on these devices
>> which is applicable for Tegra186 and later.
> 
> The power domain is shared with the display, AFAICS. The point of reset
> is to bring h/w into predictable state. It doesn't make sense to me to
> skip the reset.
> 
> If T194+ doesn't have hda2codec_2x reset, then don't request that reset
> for T194+.
> 

I don't see the problem in the driver. It's only the device-tree that is
wrong. This hda_tegra.c patch should be unneeded, please fix only the
device-tree.
Dmitry Osipenko Dec. 7, 2021, 10:58 a.m. UTC | #6
07.12.2021 13:44, Dmitry Osipenko пишет:
> 07.12.2021 13:22, Dmitry Osipenko пишет:
>> 07.12.2021 09:32, Sameer Pujar пишет:
>>> HDA regression is recently reported on Tegra194 based platforms.
>>> This happens because "hda2codec_2x" reset does not really exist
>>> in Tegra194 and it causes probe failure. All the HDA based audio
>>> tests fail at the moment. This underlying issue is exposed by
>>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>>> response") which now checks return code of BPMP command response.

I see that this BPMP commit already has been reverted. There is no
problem in this hda_tegra driver at all.

>>> The failure can be fixed by avoiding above reset in the driver,
>>> but the explicit reset is not necessary for Tegra devices which
>>> depend on BPMP. On such devices, BPMP ensures reset application
>>> during unpowergate calls. Hence skip reset on these devices
>>> which is applicable for Tegra186 and later.
>>
>> The power domain is shared with the display, AFAICS. The point of reset
>> is to bring h/w into predictable state. It doesn't make sense to me to
>> skip the reset.
>>
>> If T194+ doesn't have hda2codec_2x reset, then don't request that reset
>> for T194+.
>>
> 
> I don't see the problem in the driver. It's only the device-tree that is
> wrong. This hda_tegra.c patch should be unneeded, please fix only the
> device-tree.
>
Jon Hunter Dec. 7, 2021, 11:02 a.m. UTC | #7
On 07/12/2021 10:58, Dmitry Osipenko wrote:
> 07.12.2021 13:44, Dmitry Osipenko пишет:
>> 07.12.2021 13:22, Dmitry Osipenko пишет:
>>> 07.12.2021 09:32, Sameer Pujar пишет:
>>>> HDA regression is recently reported on Tegra194 based platforms.
>>>> This happens because "hda2codec_2x" reset does not really exist
>>>> in Tegra194 and it causes probe failure. All the HDA based audio
>>>> tests fail at the moment. This underlying issue is exposed by
>>>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>>>> response") which now checks return code of BPMP command response.
> 
> I see that this BPMP commit already has been reverted. There is no
> problem in this hda_tegra driver at all.

That is temporary until this fix is merged and then we will revert the 
revert.

Jon
Dmitry Osipenko Dec. 7, 2021, 11:57 a.m. UTC | #8
07.12.2021 14:02, Jon Hunter пишет:
> 
> On 07/12/2021 10:58, Dmitry Osipenko wrote:
>> 07.12.2021 13:44, Dmitry Osipenko пишет:
>>> 07.12.2021 13:22, Dmitry Osipenko пишет:
>>>> 07.12.2021 09:32, Sameer Pujar пишет:
>>>>> HDA regression is recently reported on Tegra194 based platforms.
>>>>> This happens because "hda2codec_2x" reset does not really exist
>>>>> in Tegra194 and it causes probe failure. All the HDA based audio
>>>>> tests fail at the moment. This underlying issue is exposed by
>>>>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>>>>> response") which now checks return code of BPMP command response.
>>
>> I see that this BPMP commit already has been reverted. There is no
>> problem in this hda_tegra driver at all.
> 
> That is temporary until this fix is merged and then we will revert the
> revert.

It's the device-tree that is broken, not the driver. If you don't care
about broken HDMI audio using outdated dtb, then there is nothing to fix
in the code.

Otherwise, the fix is to skip the non-existent reset.

You could add workaround to the BPMP reset driver by making it always
return success for TEGRA194_RESET_HDA2CODEC_2X invocation instead of
making the FW call for that reset.
Sameer Pujar Dec. 7, 2021, noon UTC | #9
On 12/7/2021 3:52 PM, Dmitry Osipenko wrote:
> 07.12.2021 09:32, Sameer Pujar пишет:
>> HDA regression is recently reported on Tegra194 based platforms.
>> This happens because "hda2codec_2x" reset does not really exist
>> in Tegra194 and it causes probe failure. All the HDA based audio
>> tests fail at the moment. This underlying issue is exposed by
>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>> response") which now checks return code of BPMP command response.
>>
>> The failure can be fixed by avoiding above reset in the driver,
>> but the explicit reset is not necessary for Tegra devices which
>> depend on BPMP. On such devices, BPMP ensures reset application
>> during unpowergate calls. Hence skip reset on these devices
>> which is applicable for Tegra186 and later.
> The power domain is shared with the display, AFAICS. The point of reset
> is to bring h/w into predictable state. It doesn't make sense to me to
> skip the reset.

Yes the power-domain is shared with display. As mentioned above, 
explicit reset in driver is not really necessary since BPMP is already 
doing it during unpowergate stage. So the h/w is already ensured to be 
in a good state.
Dmitry Osipenko Dec. 7, 2021, 12:05 p.m. UTC | #10
07.12.2021 15:00, Sameer Pujar пишет:
> 
> 
> On 12/7/2021 3:52 PM, Dmitry Osipenko wrote:
>> 07.12.2021 09:32, Sameer Pujar пишет:
>>> HDA regression is recently reported on Tegra194 based platforms.
>>> This happens because "hda2codec_2x" reset does not really exist
>>> in Tegra194 and it causes probe failure. All the HDA based audio
>>> tests fail at the moment. This underlying issue is exposed by
>>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>>> response") which now checks return code of BPMP command response.
>>>
>>> The failure can be fixed by avoiding above reset in the driver,
>>> but the explicit reset is not necessary for Tegra devices which
>>> depend on BPMP. On such devices, BPMP ensures reset application
>>> during unpowergate calls. Hence skip reset on these devices
>>> which is applicable for Tegra186 and later.
>> The power domain is shared with the display, AFAICS. The point of reset
>> is to bring h/w into predictable state. It doesn't make sense to me to
>> skip the reset.
> 
> Yes the power-domain is shared with display. As mentioned above,
> explicit reset in driver is not really necessary since BPMP is already
> doing it during unpowergate stage. So the h/w is already ensured to be
> in a good state.

If you'll reload the driver module, then h/w won't be reset.
Sameer Pujar Dec. 7, 2021, 12:40 p.m. UTC | #11
On 12/7/2021 5:35 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> 07.12.2021 15:00, Sameer Pujar пишет:
>>
>> On 12/7/2021 3:52 PM, Dmitry Osipenko wrote:
>>> 07.12.2021 09:32, Sameer Pujar пишет:
>>>> HDA regression is recently reported on Tegra194 based platforms.
>>>> This happens because "hda2codec_2x" reset does not really exist
>>>> in Tegra194 and it causes probe failure. All the HDA based audio
>>>> tests fail at the moment. This underlying issue is exposed by
>>>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>>>> response") which now checks return code of BPMP command response.
>>>>
>>>> The failure can be fixed by avoiding above reset in the driver,
>>>> but the explicit reset is not necessary for Tegra devices which
>>>> depend on BPMP. On such devices, BPMP ensures reset application
>>>> during unpowergate calls. Hence skip reset on these devices
>>>> which is applicable for Tegra186 and later.
>>> The power domain is shared with the display, AFAICS. The point of reset
>>> is to bring h/w into predictable state. It doesn't make sense to me to
>>> skip the reset.
>> Yes the power-domain is shared with display. As mentioned above,
>> explicit reset in driver is not really necessary since BPMP is already
>> doing it during unpowergate stage. So the h/w is already ensured to be
>> in a good state.
> If you'll reload the driver module, then h/w won't be reset.

How the reload case would be different? Can you please specify more 
details if you are referring to a particular scenario?
Dmitry Osipenko Dec. 7, 2021, 2:07 p.m. UTC | #12
07.12.2021 15:40, Sameer Pujar пишет:
> 
> 
> On 12/7/2021 5:35 PM, Dmitry Osipenko wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> 07.12.2021 15:00, Sameer Pujar пишет:
>>>
>>> On 12/7/2021 3:52 PM, Dmitry Osipenko wrote:
>>>> 07.12.2021 09:32, Sameer Pujar пишет:
>>>>> HDA regression is recently reported on Tegra194 based platforms.
>>>>> This happens because "hda2codec_2x" reset does not really exist
>>>>> in Tegra194 and it causes probe failure. All the HDA based audio
>>>>> tests fail at the moment. This underlying issue is exposed by
>>>>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>>>>> response") which now checks return code of BPMP command response.
>>>>>
>>>>> The failure can be fixed by avoiding above reset in the driver,
>>>>> but the explicit reset is not necessary for Tegra devices which
>>>>> depend on BPMP. On such devices, BPMP ensures reset application
>>>>> during unpowergate calls. Hence skip reset on these devices
>>>>> which is applicable for Tegra186 and later.
>>>> The power domain is shared with the display, AFAICS. The point of reset
>>>> is to bring h/w into predictable state. It doesn't make sense to me to
>>>> skip the reset.
>>> Yes the power-domain is shared with display. As mentioned above,
>>> explicit reset in driver is not really necessary since BPMP is already
>>> doing it during unpowergate stage. So the h/w is already ensured to be
>>> in a good state.
>> If you'll reload the driver module, then h/w won't be reset.
> 
> How the reload case would be different? Can you please specify more
> details if you are referring to a particular scenario?

You have a shared power domain. Since power domain can be turned off
only when nobody keeps domain turned on, you now making reset of HDA
controller dependent on the state of display driver. Do you want to have
inconsistent h/w reset behaviour depending on the runtime state of
display driver?
Sameer Pujar Dec. 7, 2021, 2:49 p.m. UTC | #13
On 12/7/2021 7:37 PM, Dmitry Osipenko wrote:
> 07.12.2021 15:40, Sameer Pujar пишет:
>>
>> On 12/7/2021 5:35 PM, Dmitry Osipenko wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> 07.12.2021 15:00, Sameer Pujar пишет:
>>>> On 12/7/2021 3:52 PM, Dmitry Osipenko wrote:
>>>>> 07.12.2021 09:32, Sameer Pujar пишет:
>>>>>> HDA regression is recently reported on Tegra194 based platforms.
>>>>>> This happens because "hda2codec_2x" reset does not really exist
>>>>>> in Tegra194 and it causes probe failure. All the HDA based audio
>>>>>> tests fail at the moment. This underlying issue is exposed by
>>>>>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>>>>>> response") which now checks return code of BPMP command response.
>>>>>>
>>>>>> The failure can be fixed by avoiding above reset in the driver,
>>>>>> but the explicit reset is not necessary for Tegra devices which
>>>>>> depend on BPMP. On such devices, BPMP ensures reset application
>>>>>> during unpowergate calls. Hence skip reset on these devices
>>>>>> which is applicable for Tegra186 and later.
>>>>> The power domain is shared with the display, AFAICS. The point of reset
>>>>> is to bring h/w into predictable state. It doesn't make sense to me to
>>>>> skip the reset.
>>>> Yes the power-domain is shared with display. As mentioned above,
>>>> explicit reset in driver is not really necessary since BPMP is already
>>>> doing it during unpowergate stage. So the h/w is already ensured to be
>>>> in a good state.
>>> If you'll reload the driver module, then h/w won't be reset.
>> How the reload case would be different? Can you please specify more
>> details if you are referring to a particular scenario?
> You have a shared power domain. Since power domain can be turned off
> only when nobody keeps domain turned on, you now making reset of HDA
> controller dependent on the state of display driver.

I don't think that the state of display driver would affect. The HDA 
driver itself can issue unpowergate calls which in turn ensures h/w 
reset. If display driver is already runtime active, HDA driver runtime 
resume after this would be still fine since h/w reset is already applied 
during display runtime resume. Note that both HDA and display resets are 
connected to this power-domain and BPMP applies these resets during 
unpowergate.

> Do you want to have
> inconsistent h/w reset behaviour depending on the runtime state of
> display driver?

Of course no.
Jon Hunter Dec. 7, 2021, 3:07 p.m. UTC | #14
On 07/12/2021 11:57, Dmitry Osipenko wrote:
> 07.12.2021 14:02, Jon Hunter пишет:
>>
>> On 07/12/2021 10:58, Dmitry Osipenko wrote:
>>> 07.12.2021 13:44, Dmitry Osipenko пишет:
>>>> 07.12.2021 13:22, Dmitry Osipenko пишет:
>>>>> 07.12.2021 09:32, Sameer Pujar пишет:
>>>>>> HDA regression is recently reported on Tegra194 based platforms.
>>>>>> This happens because "hda2codec_2x" reset does not really exist
>>>>>> in Tegra194 and it causes probe failure. All the HDA based audio
>>>>>> tests fail at the moment. This underlying issue is exposed by
>>>>>> commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP
>>>>>> response") which now checks return code of BPMP command response.
>>>
>>> I see that this BPMP commit already has been reverted. There is no
>>> problem in this hda_tegra driver at all.
>>
>> That is temporary until this fix is merged and then we will revert the
>> revert.
> 
> It's the device-tree that is broken, not the driver. If you don't care
> about broken HDMI audio using outdated dtb, then there is nothing to fix
> in the code.

That's correct. However, we do care about HDMI audio being broken with 
existing DTBs and so we need to make sure they still work.

Jon
Dmitry Osipenko Dec. 7, 2021, 3:35 p.m. UTC | #15
07.12.2021 17:49, Sameer Pujar пишет:
...
>>> How the reload case would be different? Can you please specify more
>>> details if you are referring to a particular scenario?
>> You have a shared power domain. Since power domain can be turned off
>> only when nobody keeps domain turned on, you now making reset of HDA
>> controller dependent on the state of display driver.
> 
> I don't think that the state of display driver would affect. The HDA
> driver itself can issue unpowergate calls which in turn ensures h/w
> reset. If display driver is already runtime active, HDA driver runtime
> resume after this would be still fine since h/w reset is already applied
> during display runtime resume. Note that both HDA and display resets are
> connected to this power-domain and BPMP applies these resets during
> unpowergate.

HDA won't be reset while display is active on T186+.
HDA will be reset while is display is inactive on T186+.
HDA will be reset regardless of display state on pre-T186.

This is a pure inconsistency of the reset behaviour. Please don't do it.
Sameer Pujar Dec. 7, 2021, 5:37 p.m. UTC | #16
On 12/7/2021 9:05 PM, Dmitry Osipenko wrote:
> External email: Use caution opening links or attachments
>
>
> 07.12.2021 17:49, Sameer Pujar пишет:
> ...
>>>> How the reload case would be different? Can you please specify more
>>>> details if you are referring to a particular scenario?
>>> You have a shared power domain. Since power domain can be turned off
>>> only when nobody keeps domain turned on, you now making reset of HDA
>>> controller dependent on the state of display driver.
>> I don't think that the state of display driver would affect. The HDA
>> driver itself can issue unpowergate calls which in turn ensures h/w
>> reset. If display driver is already runtime active, HDA driver runtime
>> resume after this would be still fine since h/w reset is already applied
>> during display runtime resume. Note that both HDA and display resets are
>> connected to this power-domain and BPMP applies these resets during
>> unpowergate.
> HDA won't be reset while display is active on T186+.

No. HDA reset is applied whenever power-domain is ungated. It can happen 
when either HDA or display device becomes active. So I don't think that 
it is inconsistent.
Dmitry Osipenko Dec. 7, 2021, 6:02 p.m. UTC | #17
07.12.2021 20:37, Sameer Pujar пишет:
..
>> HDA won't be reset while display is active on T186+.
> 
> No. HDA reset is applied whenever power-domain is ungated. It can happen
> when either HDA or display device becomes active.

I said "display is active", where do you see "becomes active"?

> So I don't think that
> it is inconsistent.

If display is already active, then shared power domain is already
ungated. It won't be ungated second time in a row, HDA won't get the reset.
Sameer Pujar Dec. 8, 2021, 5:22 a.m. UTC | #18
On 12/7/2021 11:32 PM, Dmitry Osipenko wrote
> If display is already active, then shared power domain is already
> ungated.

If display is already active, then shared power domain is already 
ungated. HDA reset is already applied during this ungate. In other 
words, HDA would be reset as well when display ungates power-domain.
Dmitry Osipenko Dec. 8, 2021, 12:05 p.m. UTC | #19
08.12.2021 08:22, Sameer Pujar пишет:
> 
> 
> On 12/7/2021 11:32 PM, Dmitry Osipenko wrote
>> If display is already active, then shared power domain is already
>> ungated.
> 
> If display is already active, then shared power domain is already
> ungated. HDA reset is already applied during this ungate. In other
> words, HDA would be reset as well when display ungates power-domain.

Now, if you'll reload the HDA driver module while display is active,
you'll get a different reset behaviour. HDA hardware will be reset on
pre-T186, on T186+ it won't be reset.

Please make v2 using devm_reset_control_bulk_get_exclusive(), skipping
the non-existent reset, or move the workaround to the BPMP driver like I
suggested in the other reply.
Sameer Pujar Dec. 14, 2021, 6:02 a.m. UTC | #20
On 12/8/2021 5:35 PM, Dmitry Osipenko wrote:
> 08.12.2021 08:22, Sameer Pujar пишет:
>>
>> On 12/7/2021 11:32 PM, Dmitry Osipenko wrote
>>> If display is already active, then shared power domain is already
>>> ungated.
>> If display is already active, then shared power domain is already
>> ungated. HDA reset is already applied during this ungate. In other
>> words, HDA would be reset as well when display ungates power-domain.
> Now, if you'll reload the HDA driver module while display is active,
> you'll get a different reset behaviour. HDA hardware will be reset on
> pre-T186, on T186+ it won't be reset.

How the reset behavior is different? At this point when HDA driver is 
loaded the HW is already reset during display ungate. What matters, 
during HDA driver load, is whether the HW is in predictable state or not 
and the answer is yes. So I am not sure what problem you are referring 
to. Question is, if BPMP already ensures this, then why driver needs to 
take care of it.
Dmitry Osipenko Dec. 14, 2021, 6:09 a.m. UTC | #21
14.12.2021 09:02, Sameer Pujar пишет:
> 
> 
> On 12/8/2021 5:35 PM, Dmitry Osipenko wrote:
>> 08.12.2021 08:22, Sameer Pujar пишет:
>>>
>>> On 12/7/2021 11:32 PM, Dmitry Osipenko wrote
>>>> If display is already active, then shared power domain is already
>>>> ungated.
>>> If display is already active, then shared power domain is already
>>> ungated. HDA reset is already applied during this ungate. In other
>>> words, HDA would be reset as well when display ungates power-domain.
>> Now, if you'll reload the HDA driver module while display is active,
>> you'll get a different reset behaviour. HDA hardware will be reset on
>> pre-T186, on T186+ it won't be reset.
> 
> How the reset behavior is different? At this point when HDA driver is
> loaded the HW is already reset during display ungate. What matters,
> during HDA driver load, is whether the HW is in predictable state or not
> and the answer is yes. So I am not sure what problem you are referring
> to. Question is, if BPMP already ensures this, then why driver needs to
> take care of it.

1. Enable display
2. Play audio over HDMI
3. HDA hardware now is in dirty state
4. Reload HDA driver
5. In your case HDA is kept in dirty state, in my no
Dmitry Osipenko Dec. 14, 2021, 6:15 a.m. UTC | #22
14.12.2021 09:09, Dmitry Osipenko пишет:
> 14.12.2021 09:02, Sameer Pujar пишет:
>>
>>
>> On 12/8/2021 5:35 PM, Dmitry Osipenko wrote:
>>> 08.12.2021 08:22, Sameer Pujar пишет:
>>>>
>>>> On 12/7/2021 11:32 PM, Dmitry Osipenko wrote
>>>>> If display is already active, then shared power domain is already
>>>>> ungated.
>>>> If display is already active, then shared power domain is already
>>>> ungated. HDA reset is already applied during this ungate. In other
>>>> words, HDA would be reset as well when display ungates power-domain.
>>> Now, if you'll reload the HDA driver module while display is active,
>>> you'll get a different reset behaviour. HDA hardware will be reset on
>>> pre-T186, on T186+ it won't be reset.
>>
>> How the reset behavior is different? At this point when HDA driver is
>> loaded the HW is already reset during display ungate. What matters,
>> during HDA driver load, is whether the HW is in predictable state or not
>> and the answer is yes. So I am not sure what problem you are referring
>> to. Question is, if BPMP already ensures this, then why driver needs to
>> take care of it.
> 
> 1. Enable display
> 2. Play audio over HDMI
> 3. HDA hardware now is in dirty state
> 4. Reload HDA driver
> 5. In your case HDA is kept in dirty state, in my no
> 

The power domain is shared by display and HDA, is this correct?

If yes, then the shared power domain will be turned off only when all
its clients are turned off, i.e. both display and HDA simultaneously.
Sameer Pujar Dec. 14, 2021, 7:22 a.m. UTC | #23
On 12/14/2021 11:39 AM, Dmitry Osipenko wrote:
> 14.12.2021 09:02, Sameer Pujar пишет:
>>
>> On 12/8/2021 5:35 PM, Dmitry Osipenko wrote:
>>> 08.12.2021 08:22, Sameer Pujar пишет:
>>>> On 12/7/2021 11:32 PM, Dmitry Osipenko wrote
>>>>> If display is already active, then shared power domain is already
>>>>> ungated.
>>>> If display is already active, then shared power domain is already
>>>> ungated. HDA reset is already applied during this ungate. In other
>>>> words, HDA would be reset as well when display ungates power-domain.
>>> Now, if you'll reload the HDA driver module while display is active,
>>> you'll get a different reset behaviour. HDA hardware will be reset on
>>> pre-T186, on T186+ it won't be reset.
>> How the reset behavior is different? At this point when HDA driver is
>> loaded the HW is already reset during display ungate. What matters,
>> during HDA driver load, is whether the HW is in predictable state or not
>> and the answer is yes. So I am not sure what problem you are referring
>> to. Question is, if BPMP already ensures this, then why driver needs to
>> take care of it.
> 1. Enable display
> 2. Play audio over HDMI

> 3. HDA hardware now is in dirty state

Why this would be a dirty state? It is rather a functional state. Isn't 
it? Power-domain is ON while all this happens.

Another point is, with present logic the reset is not applied for every 
runtime PM resume of HDA device, which is confusing. It depends on the 
state of 'chip->running' flag and I don't see this getting cleared 
anywhere. Would you say subsequent HDA playback happen under a dirty state?
Dmitry Osipenko Dec. 14, 2021, 1:56 p.m. UTC | #24
14.12.2021 10:22, Sameer Pujar пишет:
...
>>> How the reset behavior is different? At this point when HDA driver is
>>> loaded the HW is already reset during display ungate. What matters,
>>> during HDA driver load, is whether the HW is in predictable state or not
>>> and the answer is yes. So I am not sure what problem you are referring
>>> to. Question is, if BPMP already ensures this, then why driver needs to
>>> take care of it.
>> 1. Enable display
>> 2. Play audio over HDMI
> 
>> 3. HDA hardware now is in dirty state
> 
> Why this would be a dirty state? It is rather a functional state. Isn't
> it? Power-domain is ON while all this happens.

In general state should be a functional, but we shouldn't assume that.
There is always a possibility for a subtle bug in a driver that may put
h/w into a bad state. Full hardware reset is encouraged by users.

> Another point is, with present logic the reset is not applied for every
> runtime PM resume of HDA device, which is confusing. It depends on the
> state of 'chip->running' flag and I don't see this getting cleared
> anywhere. Would you say subsequent HDA playback happen under a dirty state?

This is a good point. There should be another potential problem in the
HDA driver for newer SoCs because apparently we don't re-initialize HDA
controller properly after runtime PM resume.

See hda_tegra_first_init() that is invoked only during driver probe, it
configures FPCI_DBG_CFG_2 register on T194, which isn't done by
hda_tegra_init(), and thus, this register may be  in reset state after
resume from RPM suspend. It should be a bug in the HDA driver that needs
to be fixed.

On older SoCs: HDA resides in the APB power domain which could be
disabled only across system suspend/resume. HDA is only clock-gated
during runtime PM suspend.

On newer SoCs: HDA power state could be lost after RPM suspend/resume,
depending on the state of display. I'm wondering whether HDMI playback
works after DPMS on T194+, I assume this case was never tested properly.

It looks like it should be safe to reset HDA on runtime PM resume
regardless of the chip->running, and thus, we could remove that check
and reset HDA unconditionally. Will great if you could check/test and
improve this in the driver.

I'm also wondering whether snd_power_change_state() should be moved into
RPM callbacks and whether this function does anything practically useful
on Tegra at all.
Takashi Iwai Dec. 14, 2021, 2:29 p.m. UTC | #25
On Tue, 14 Dec 2021 14:56:12 +0100,
Dmitry Osipenko wrote:
> 
> 14.12.2021 10:22, Sameer Pujar пишет:
> ...
> >>> How the reset behavior is different? At this point when HDA driver is
> >>> loaded the HW is already reset during display ungate. What matters,
> >>> during HDA driver load, is whether the HW is in predictable state or not
> >>> and the answer is yes. So I am not sure what problem you are referring
> >>> to. Question is, if BPMP already ensures this, then why driver needs to
> >>> take care of it.
> >> 1. Enable display
> >> 2. Play audio over HDMI
> > 
> >> 3. HDA hardware now is in dirty state
> > 
> > Why this would be a dirty state? It is rather a functional state. Isn't
> > it? Power-domain is ON while all this happens.
> 
> In general state should be a functional, but we shouldn't assume that.
> There is always a possibility for a subtle bug in a driver that may put
> h/w into a bad state. Full hardware reset is encouraged by users.
> 
> > Another point is, with present logic the reset is not applied for every
> > runtime PM resume of HDA device, which is confusing. It depends on the
> > state of 'chip->running' flag and I don't see this getting cleared
> > anywhere. Would you say subsequent HDA playback happen under a dirty state?
> 
> This is a good point. There should be another potential problem in the
> HDA driver for newer SoCs because apparently we don't re-initialize HDA
> controller properly after runtime PM resume.
> 
> See hda_tegra_first_init() that is invoked only during driver probe, it
> configures FPCI_DBG_CFG_2 register on T194, which isn't done by
> hda_tegra_init(), and thus, this register may be  in reset state after
> resume from RPM suspend. It should be a bug in the HDA driver that needs
> to be fixed.
> 
> On older SoCs: HDA resides in the APB power domain which could be
> disabled only across system suspend/resume. HDA is only clock-gated
> during runtime PM suspend.
> 
> On newer SoCs: HDA power state could be lost after RPM suspend/resume,
> depending on the state of display. I'm wondering whether HDMI playback
> works after DPMS on T194+, I assume this case was never tested properly.
> 
> It looks like it should be safe to reset HDA on runtime PM resume
> regardless of the chip->running, and thus, we could remove that check
> and reset HDA unconditionally. Will great if you could check/test and
> improve this in the driver.
> 
> I'm also wondering whether snd_power_change_state() should be moved into
> RPM callbacks and whether this function does anything practically useful
> on Tegra at all.

This call is mostly for ALSA core stuff, and not necessarily
reflecting the exact device power state.  The major role is for
controlling / blocking the device accesses at the system
suspend/resume, so it's correct to set only in the system
suspend/resume callbacks, not in runtime PM.


Takashi
Dmitry Osipenko Dec. 14, 2021, 3:34 p.m. UTC | #26
14.12.2021 17:29, Takashi Iwai пишет:
>> I'm also wondering whether snd_power_change_state() should be moved into
>> RPM callbacks and whether this function does anything practically useful
>> on Tegra at all.
> This call is mostly for ALSA core stuff, and not necessarily
> reflecting the exact device power state.  The major role is for
> controlling / blocking the device accesses at the system
> suspend/resume, so it's correct to set only in the system
> suspend/resume callbacks, not in runtime PM.
> 

Thank you for the clarification.
Sameer Pujar Dec. 20, 2021, 10:32 a.m. UTC | #27
On 12/14/2021 7:26 PM, Dmitry Osipenko wrote:
> 14.12.2021 10:22, Sameer Pujar пишет:
> ...
>>>> How the reset behavior is different? At this point when HDA driver is
>>>> loaded the HW is already reset during display ungate. What matters,
>>>> during HDA driver load, is whether the HW is in predictable state or not
>>>> and the answer is yes. So I am not sure what problem you are referring
>>>> to. Question is, if BPMP already ensures this, then why driver needs to
>>>> take care of it.
>>> 1. Enable display
>>> 2. Play audio over HDMI
>>> 3. HDA hardware now is in dirty state
>> Why this would be a dirty state? It is rather a functional state. Isn't
>> it? Power-domain is ON while all this happens.
> In general state should be a functional, but we shouldn't assume that.
> There is always a possibility for a subtle bug in a driver that may put
> h/w into a bad state. Full hardware reset is encouraged by users.

OK. I will prepare a v2 by just skipping the invalid reset for Tegra194.


>
>> Another point is, with present logic the reset is not applied for every
>> runtime PM resume of HDA device, which is confusing. It depends on the
>> state of 'chip->running' flag and I don't see this getting cleared
>> anywhere. Would you say subsequent HDA playback happen under a dirty state?
> This is a good point. There should be another potential problem in the
> HDA driver for newer SoCs because apparently we don't re-initialize HDA
> controller properly after runtime PM resume.
>
> See hda_tegra_first_init() that is invoked only during driver probe, it
> configures FPCI_DBG_CFG_2 register on T194, which isn't done by
> hda_tegra_init(), and thus, this register may be  in reset state after
> resume from RPM suspend. It should be a bug in the HDA driver that needs
> to be fixed.
>
> On older SoCs: HDA resides in the APB power domain which could be
> disabled only across system suspend/resume. HDA is only clock-gated
> during runtime PM suspend.
>
> On newer SoCs: HDA power state could be lost after RPM suspend/resume,
> depending on the state of display. I'm wondering whether HDMI playback
> works after DPMS on T194+, I assume this case was never tested properly.
>
> It looks like it should be safe to reset HDA on runtime PM resume
> regardless of the chip->running, and thus, we could remove that check
> and reset HDA unconditionally. Will great if you could check/test and
> improve this in the driver.

There seems to be multiple issues. I will work on this separately and 
send a separate series. Presently basic function is broken on Tegra194 
and will first send v2 to fix the regression. Thanks for review.
diff mbox series

Patch

diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index ea700395..862141e 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -68,6 +68,10 @@ 
  */
 #define TEGRA194_NUM_SDO_LINES	  4
 
+struct hda_data {
+	unsigned int do_reset:1;
+};
+
 struct hda_tegra {
 	struct azx chip;
 	struct device *dev;
@@ -76,6 +80,7 @@  struct hda_tegra {
 	unsigned int nclocks;
 	void __iomem *regs;
 	struct work_struct probe_work;
+	const struct hda_data *data;
 };
 
 #ifdef CONFIG_PM
@@ -427,8 +432,13 @@  static int hda_tegra_create(struct snd_card *card,
 	return 0;
 }
 
+static const struct hda_data tegra30_data = {
+	.do_reset = 1,
+};
+
 static const struct of_device_id hda_tegra_match[] = {
-	{ .compatible = "nvidia,tegra30-hda" },
+	{ .compatible = "nvidia,tegra30-hda", .data = &tegra30_data },
+	{ .compatible = "nvidia,tegra186-hda" },
 	{ .compatible = "nvidia,tegra194-hda" },
 	{},
 };
@@ -449,6 +459,8 @@  static int hda_tegra_probe(struct platform_device *pdev)
 	hda->dev = &pdev->dev;
 	chip = &hda->chip;
 
+	hda->data = of_device_get_match_data(&pdev->dev);
+
 	err = snd_card_new(&pdev->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
 			   THIS_MODULE, 0, &card);
 	if (err < 0) {
@@ -456,10 +468,12 @@  static int hda_tegra_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	hda->reset = devm_reset_control_array_get_exclusive(&pdev->dev);
-	if (IS_ERR(hda->reset)) {
-		err = PTR_ERR(hda->reset);
-		goto out_free;
+	if (hda->data && hda->data->do_reset) {
+		hda->reset = devm_reset_control_array_get_exclusive(&pdev->dev);
+		if (IS_ERR(hda->reset)) {
+			err = PTR_ERR(hda->reset);
+			goto out_free;
+		}
 	}
 
 	hda->clocks[hda->nclocks++].id = "hda";