[3/5] dmaengine: Support for querying maximum trasnfer length (of an SG element)

Message ID 20170912104424.18495-4-peter.ujfalusi@ti.com
State New
Headers show
Series
  • dmaengine: core/edma/omap-dma: maximum SG len reporting
Related show

Commit Message

Peter Ujfalusi Sept. 12, 2017, 10:44 a.m.
Certain DMA engines have limitation on the maximum size of a transfer they
can support. This size limitation is per SG element or for period length in
cyclic transfers.
In TI's eDMA and sDMA this limitation is not really a length limit, but it
is the number of bursts that we can support in one transfer.

With this callback the DMA drivers can provide hints to clients on how they
should set up their buffers (sglist, cyclic buffer). Without this the
clients must have open coded workarounds in place for each and every DMA
engine they might be interfacing with to have correct length for the
transfers.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
---
 include/linux/dmaengine.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

Comments

Vinod Koul Sept. 21, 2017, 5:14 p.m. | #1
On Tue, Sep 12, 2017 at 01:44:22PM +0300, Peter Ujfalusi wrote:
> Certain DMA engines have limitation on the maximum size of a transfer they
> can support. This size limitation is per SG element or for period length in
> cyclic transfers.
> In TI's eDMA and sDMA this limitation is not really a length limit, but it
> is the number of bursts that we can support in one transfer.
> 
> With this callback the DMA drivers can provide hints to clients on how they
> should set up their buffers (sglist, cyclic buffer). Without this the
> clients must have open coded workarounds in place for each and every DMA
> engine they might be interfacing with to have correct length for the
> transfers.
> 
> Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
> ---
>  include/linux/dmaengine.h | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index 8319101170fc..739824b94c1b 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -705,6 +705,9 @@ struct dma_filter {
>   * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
>   * @device_config: Pushes a new configuration to a channel, return 0 or an error
>   *	code
> + * @device_get_max_len: Get the maximum supported length in bytes of a slave
> + *	transfer based on the set dma_slave_config. The length limitation
> + *	applies to each SG element's length.
>   * @device_pause: Pauses any transfer happening on a channel. Returns
>   *	0 or an error code
>   * @device_resume: Resumes any transfer on a channel previously
> @@ -792,6 +795,8 @@ struct dma_device {
>  
>  	int (*device_config)(struct dma_chan *chan,
>  			     struct dma_slave_config *config);
> +	u32 (*device_get_max_len)(struct dma_chan *chan,
> +				  enum dma_transfer_direction dir);
>  	int (*device_pause)(struct dma_chan *chan);
>  	int (*device_resume)(struct dma_chan *chan);
>  	int (*device_terminate_all)(struct dma_chan *chan);
> @@ -812,6 +817,15 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
>  	return -ENOSYS;
>  }
>  
> +static inline u32 dmaengine_slave_get_max_len(struct dma_chan *chan,
> +					      enum dma_transfer_direction dir)
> +{
> +	if (chan->device->device_get_max_len)
> +		return chan->device->device_get_max_len(chan, dir);

not another callback :)

on a serious note, why shouldn't this be one more capability in
dma_slave_caps. looking at next patch it seems static
Peter Ujfalusi Sept. 22, 2017, 9:39 a.m. | #2

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki

On 2017-09-21 20:14, Vinod Koul wrote:
> On Tue, Sep 12, 2017 at 01:44:22PM +0300, Peter Ujfalusi wrote:
>> Certain DMA engines have limitation on the maximum size of a transfer they
>> can support. This size limitation is per SG element or for period length in
>> cyclic transfers.
>> In TI's eDMA and sDMA this limitation is not really a length limit, but it
>> is the number of bursts that we can support in one transfer.
>>
>> With this callback the DMA drivers can provide hints to clients on how they
>> should set up their buffers (sglist, cyclic buffer). Without this the
>> clients must have open coded workarounds in place for each and every DMA
>> engine they might be interfacing with to have correct length for the
>> transfers.
>>
>> Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
>> ---
>>  include/linux/dmaengine.h | 14 ++++++++++++++
>>  1 file changed, 14 insertions(+)
>>
>> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
>> index 8319101170fc..739824b94c1b 100644
>> --- a/include/linux/dmaengine.h
>> +++ b/include/linux/dmaengine.h
>> @@ -705,6 +705,9 @@ struct dma_filter {
>>   * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
>>   * @device_config: Pushes a new configuration to a channel, return 0 or an error
>>   *	code
>> + * @device_get_max_len: Get the maximum supported length in bytes of a slave
>> + *	transfer based on the set dma_slave_config. The length limitation
>> + *	applies to each SG element's length.
>>   * @device_pause: Pauses any transfer happening on a channel. Returns
>>   *	0 or an error code
>>   * @device_resume: Resumes any transfer on a channel previously
>> @@ -792,6 +795,8 @@ struct dma_device {
>>  
>>  	int (*device_config)(struct dma_chan *chan,
>>  			     struct dma_slave_config *config);
>> +	u32 (*device_get_max_len)(struct dma_chan *chan,
>> +				  enum dma_transfer_direction dir);
>>  	int (*device_pause)(struct dma_chan *chan);
>>  	int (*device_resume)(struct dma_chan *chan);
>>  	int (*device_terminate_all)(struct dma_chan *chan);
>> @@ -812,6 +817,15 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
>>  	return -ENOSYS;
>>  }
>>  
>> +static inline u32 dmaengine_slave_get_max_len(struct dma_chan *chan,
>> +					      enum dma_transfer_direction dir)
>> +{
>> +	if (chan->device->device_get_max_len)
>> +		return chan->device->device_get_max_len(chan, dir);
> 
> not another callback :)
> 
> on a serious note, why shouldn't this be one more capability in
> dma_slave_caps. looking at next patch it seems static

It is not really static, the size in bytes depends on the dev_width and
the maxburst:
dev_width * burst * (SZ_64K - 1);

The number of (dev_width * burst) is static, yes. Other DMA engines
might have similar interpretation, but returning the maximum length in
bytes sounded more generic for other engines to be able to adopt.

Initially I had maxburst_cnt in struct dma_device for maximum burst
count within one SG, but it felt clumsy and not too intuitive either.

- Péter
Vinod Koul Sept. 26, 2017, 4:54 p.m. | #3
On Fri, Sep 22, 2017 at 12:39:38PM +0300, Peter Ujfalusi wrote:
> 
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
> 
> On 2017-09-21 20:14, Vinod Koul wrote:
> > On Tue, Sep 12, 2017 at 01:44:22PM +0300, Peter Ujfalusi wrote:
> >> Certain DMA engines have limitation on the maximum size of a transfer they
> >> can support. This size limitation is per SG element or for period length in
> >> cyclic transfers.
> >> In TI's eDMA and sDMA this limitation is not really a length limit, but it
> >> is the number of bursts that we can support in one transfer.
> >>
> >> With this callback the DMA drivers can provide hints to clients on how they
> >> should set up their buffers (sglist, cyclic buffer). Without this the
> >> clients must have open coded workarounds in place for each and every DMA
> >> engine they might be interfacing with to have correct length for the
> >> transfers.
> >>
> >> Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
> >> ---
> >>  include/linux/dmaengine.h | 14 ++++++++++++++
> >>  1 file changed, 14 insertions(+)
> >>
> >> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> >> index 8319101170fc..739824b94c1b 100644
> >> --- a/include/linux/dmaengine.h
> >> +++ b/include/linux/dmaengine.h
> >> @@ -705,6 +705,9 @@ struct dma_filter {
> >>   * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
> >>   * @device_config: Pushes a new configuration to a channel, return 0 or an error
> >>   *	code
> >> + * @device_get_max_len: Get the maximum supported length in bytes of a slave
> >> + *	transfer based on the set dma_slave_config. The length limitation
> >> + *	applies to each SG element's length.
> >>   * @device_pause: Pauses any transfer happening on a channel. Returns
> >>   *	0 or an error code
> >>   * @device_resume: Resumes any transfer on a channel previously
> >> @@ -792,6 +795,8 @@ struct dma_device {
> >>  
> >>  	int (*device_config)(struct dma_chan *chan,
> >>  			     struct dma_slave_config *config);
> >> +	u32 (*device_get_max_len)(struct dma_chan *chan,
> >> +				  enum dma_transfer_direction dir);
> >>  	int (*device_pause)(struct dma_chan *chan);
> >>  	int (*device_resume)(struct dma_chan *chan);
> >>  	int (*device_terminate_all)(struct dma_chan *chan);
> >> @@ -812,6 +817,15 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
> >>  	return -ENOSYS;
> >>  }
> >>  
> >> +static inline u32 dmaengine_slave_get_max_len(struct dma_chan *chan,
> >> +					      enum dma_transfer_direction dir)
> >> +{
> >> +	if (chan->device->device_get_max_len)
> >> +		return chan->device->device_get_max_len(chan, dir);
> > 
> > not another callback :)
> > 
> > on a serious note, why shouldn't this be one more capability in
> > dma_slave_caps. looking at next patch it seems static
> 
> It is not really static, the size in bytes depends on the dev_width and
> the maxburst:
> dev_width * burst * (SZ_64K - 1);

well DMAengines work on FIFOs, in above you are giving length as SZ_64K - 1
'items' which IIUC in DMAengine terms for bytes would always refer wrt width
used and burst applied.

Return length in bytes does make sense (from user PoV), but then you need to
"know" the applied  width and burst. How do you decide those?

> 
> The number of (dev_width * burst) is static, yes. Other DMA engines
> might have similar interpretation, but returning the maximum length in
> bytes sounded more generic for other engines to be able to adopt.
> 
> Initially I had maxburst_cnt in struct dma_device for maximum burst
> count within one SG, but it felt clumsy and not too intuitive either.

> 
> - Péter
>
Peter Ujfalusi Oct. 2, 2017, 11:24 a.m. | #4



Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki

On 2017-09-26 19:54, Vinod Koul wrote:
>>>
>>> not another callback :)
>>>
>>> on a serious note, why shouldn't this be one more capability in
>>> dma_slave_caps. looking at next patch it seems static
>>
>> It is not really static, the size in bytes depends on the dev_width and
>> the maxburst:
>> dev_width * burst * (SZ_64K - 1);
> 
> well DMAengines work on FIFOs, in above you are giving length as SZ_64K - 1
> 'items' which IIUC in DMAengine terms for bytes would always refer wrt width
> used and burst applied.

I think we can live with this and let the user to figure out what to do
with this information.

But I'm having hard time to figure out a good name for this. It is not
the number of SGs we can support, but the number of 'items' within one
SG that we have the limit. It could be:
u32 max_bursts_per_sg;

which would also apply to period length (for cyclic) in a similar way.

> Return length in bytes does make sense (from user PoV), but then you need to
> "know" the applied  width and burst. How do you decide those?

The number of items works eDMA and sDMA, but we also have the cpp41. It
is a packet DMA and it has no understanding of bursts, address widths or
any of the 'traditional' things. It only cares about the number of bytes
we want to transfer and it has limitation of 4194303 bytes (21bits for
length). This is again per SG. How this could report the
'max_bursts_per_sg' ?

This was one of the reasons that I have settled with the callback.

What we can also do is to code this within the DMA drivers itself.

When setting up the transfer and we realize that one of the SG will not
going to fit, we destroy what we have done so far, pass the sg list
along with length/sg limit to create a new sg list where all sg item's
length is under the limit. Then using this new sg list we can set up the
transfer.

I'm not sure how hard is to do the sg list optimization, I see that
sg_split() is not what we want so we might need to code this in
dmaengine or in the scatterlist code.

We certainly don't want to verify all slave_sg transfers proactively to
avoid adding latency when it is not necessary.


>>
>> The number of (dev_width * burst) is static, yes. Other DMA engines
>> might have similar interpretation, but returning the maximum length in
>> bytes sounded more generic for other engines to be able to adopt.
>>
>> Initially I had maxburst_cnt in struct dma_device for maximum burst
>> count within one SG, but it felt clumsy and not too intuitive either.
> 
>>
>> - Péter
>>
> 

- Péter
Vinod Koul Oct. 8, 2017, 5:25 a.m. | #5
On Mon, Oct 02, 2017 at 02:24:12PM +0300, Peter Ujfalusi wrote:
> 
> 
> 
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
> 
> On 2017-09-26 19:54, Vinod Koul wrote:
> >>>
> >>> not another callback :)
> >>>
> >>> on a serious note, why shouldn't this be one more capability in
> >>> dma_slave_caps. looking at next patch it seems static
> >>
> >> It is not really static, the size in bytes depends on the dev_width and
> >> the maxburst:
> >> dev_width * burst * (SZ_64K - 1);
> > 
> > well DMAengines work on FIFOs, in above you are giving length as SZ_64K - 1
> > 'items' which IIUC in DMAengine terms for bytes would always refer wrt width
> > used and burst applied.
> 
> I think we can live with this and let the user to figure out what to do
> with this information.

Right, plus a macro for conversion :) SO that users dont code buggy
conversions all over the place

> But I'm having hard time to figure out a good name for this. It is not
> the number of SGs we can support, but the number of 'items' within one
> SG that we have the limit. It could be:
> u32 max_bursts_per_sg;

this looks fine, another candidate I would use is words_per_sg and while at
it why tie it to sg? should we make it words_per_txn but then people should not
confuse with txn represented by a descriptor which can have multiple ....

> 
> which would also apply to period length (for cyclic) in a similar way.
> 
> > Return length in bytes does make sense (from user PoV), but then you need to
> > "know" the applied  width and burst. How do you decide those?
> 
> The number of items works eDMA and sDMA, but we also have the cpp41. It
> is a packet DMA and it has no understanding of bursts, address widths or
> any of the 'traditional' things. It only cares about the number of bytes
> we want to transfer and it has limitation of 4194303 bytes (21bits for
> length). This is again per SG. How this could report the
> 'max_bursts_per_sg' ?

hmmm that is intresting case, is this number coming from USB side?

> This was one of the reasons that I have settled with the callback.
> 
> What we can also do is to code this within the DMA drivers itself.
> 
> When setting up the transfer and we realize that one of the SG will not
> going to fit, we destroy what we have done so far, pass the sg list
> along with length/sg limit to create a new sg list where all sg item's
> length is under the limit. Then using this new sg list we can set up the
> transfer.
> 
> I'm not sure how hard is to do the sg list optimization, I see that
> sg_split() is not what we want so we might need to code this in
> dmaengine or in the scatterlist code.
> 
> We certainly don't want to verify all slave_sg transfers proactively to
> avoid adding latency when it is not necessary.

latency would be added at prepare, not when submitting..
Peter Ujfalusi Oct. 11, 2017, 3:47 p.m. | #6

Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki

On 10/08/2017 08:25 AM, Vinod Koul wrote:
>>>> It is not really static, the size in bytes depends on the dev_width and
>>>> the maxburst:
>>>> dev_width * burst * (SZ_64K - 1);
>>>
>>> well DMAengines work on FIFOs, in above you are giving length as SZ_64K - 1
>>> 'items' which IIUC in DMAengine terms for bytes would always refer wrt width
>>> used and burst applied.
>>
>> I think we can live with this and let the user to figure out what to do
>> with this information.
> 
> Right, plus a macro for conversion :) SO that users dont code buggy
> conversions all over the place

OK, but still the naming... ;)

>> But I'm having hard time to figure out a good name for this. It is not
>> the number of SGs we can support, but the number of 'items' within one
>> SG that we have the limit. It could be:
>> u32 max_bursts_per_sg;
> 
> this looks fine, another candidate I would use is words_per_sg and while at
> it why tie it to sg? should we make it words_per_txn but then people should not
> confuse with txn represented by a descriptor which can have multiple ....

Yes, this limit is not only per SG as the same limit actually applies to
cyclic's period_len in a same way.

words_per_txn does not sound right as for me the words would refer to
dev_width number of bytes and I'm seeking on limit on the number of (dev_width
* bursts) sub-chunks.

bursts_per_chunk? With a long comment?

Which would apply to sg_dma_len in slave_sg(), period_len in cyclic() and len
in slave_single().

>>
>> which would also apply to period length (for cyclic) in a similar way.
>>
>>> Return length in bytes does make sense (from user PoV), but then you need to
>>> "know" the applied  width and burst. How do you decide those?
>>
>> The number of items works eDMA and sDMA, but we also have the cpp41. It
>> is a packet DMA and it has no understanding of bursts, address widths or
>> any of the 'traditional' things. It only cares about the number of bytes
>> we want to transfer and it has limitation of 4194303 bytes (21bits for
>> length). This is again per SG. How this could report the
>> 'max_bursts_per_sg' ?
> 
> hmmm that is intresting case, is this number coming from USB side?

No, it is coming from cppi4.1's descriptor. The maximum length of a packet is
stored in 21bits.

But this is a bit more complicated ;) The whole packet have 21bits size limit,
but at the same time the whole sg_dma_len() also have this as we can link a
several host buffer descriptors to one host packet descriptor. Each have
21bits for length, but at the same time the sum of the hpd and the linked hbd
length can not be more than what we can store in 21bits...

> 
>> This was one of the reasons that I have settled with the callback.
>>
>> What we can also do is to code this within the DMA drivers itself.
>>
>> When setting up the transfer and we realize that one of the SG will not
>> going to fit, we destroy what we have done so far, pass the sg list
>> along with length/sg limit to create a new sg list where all sg item's
>> length is under the limit. Then using this new sg list we can set up the
>> transfer.
>>
>> I'm not sure how hard is to do the sg list optimization, I see that
>> sg_split() is not what we want so we might need to code this in
>> dmaengine or in the scatterlist code.
>>
>> We certainly don't want to verify all slave_sg transfers proactively to
>> avoid adding latency when it is not necessary.
> 
> latency would be added at prepare, not when submitting..

Yes, but you submit transfers all the time, and added latency would be crucial.

But this could be done with a DMAengine internal helper, I think.
If a DMA driver figures out that one of the SG length is over the supported
limit, then it could clean up everything and call something like:

struct dma_async_tx_descriptor *dmaengine_fixup_and_prep_slave_sg(
	struct dma_chan *chan, struct scatterlist *sgl,
	unsigned int sg_len, enum dma_transfer_direction dir,
	unsigned long flags, size_t max_sg_dma_len)

and this would split up the original SG list to a temp one meeting the
max_sg_dma_len and call chan->device->device_prep_slave_sg()

In this round the setup would succeed and the caller and I would be happy.

But if the caller already aware of the sg_dma_len limit it can prepare the SG
list correctly and we save time to recreate the list.
Vinod Koul Oct. 12, 2017, 1:57 p.m. | #7
On Wed, Oct 11, 2017 at 06:47:18PM +0300, Peter Ujfalusi wrote:
> 
> Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
> 
> On 10/08/2017 08:25 AM, Vinod Koul wrote:
> >>>> It is not really static, the size in bytes depends on the dev_width and
> >>>> the maxburst:
> >>>> dev_width * burst * (SZ_64K - 1);
> >>>
> >>> well DMAengines work on FIFOs, in above you are giving length as SZ_64K - 1
> >>> 'items' which IIUC in DMAengine terms for bytes would always refer wrt width
> >>> used and burst applied.
> >>
> >> I think we can live with this and let the user to figure out what to do
> >> with this information.
> > 
> > Right, plus a macro for conversion :) SO that users dont code buggy
> > conversions all over the place
> 
> OK, but still the naming... ;)
> 
> >> But I'm having hard time to figure out a good name for this. It is not
> >> the number of SGs we can support, but the number of 'items' within one
> >> SG that we have the limit. It could be:
> >> u32 max_bursts_per_sg;
> > 
> > this looks fine, another candidate I would use is words_per_sg and while at
> > it why tie it to sg? should we make it words_per_txn but then people should not
> > confuse with txn represented by a descriptor which can have multiple ....
> 
> Yes, this limit is not only per SG as the same limit actually applies to
> cyclic's period_len in a same way.
> 
> words_per_txn does not sound right as for me the words would refer to
> dev_width number of bytes and I'm seeking on limit on the number of (dev_width
> * bursts) sub-chunks.
> 
> bursts_per_chunk? With a long comment?
> 
> Which would apply to sg_dma_len in slave_sg(), period_len in cyclic() and len
> in slave_single().

Sounds much better to me :)

> 
> >>
> >> which would also apply to period length (for cyclic) in a similar way.
> >>
> >>> Return length in bytes does make sense (from user PoV), but then you need to
> >>> "know" the applied  width and burst. How do you decide those?
> >>
> >> The number of items works eDMA and sDMA, but we also have the cpp41. It
> >> is a packet DMA and it has no understanding of bursts, address widths or
> >> any of the 'traditional' things. It only cares about the number of bytes
> >> we want to transfer and it has limitation of 4194303 bytes (21bits for
> >> length). This is again per SG. How this could report the
> >> 'max_bursts_per_sg' ?
> > 
> > hmmm that is intresting case, is this number coming from USB side?
> 
> No, it is coming from cppi4.1's descriptor. The maximum length of a packet is
> stored in 21bits.
> 
> But this is a bit more complicated ;) The whole packet have 21bits size limit,
> but at the same time the whole sg_dma_len() also have this as we can link a
> several host buffer descriptors to one host packet descriptor. Each have
> 21bits for length, but at the same time the sum of the hpd and the linked hbd
> length can not be more than what we can store in 21bits...
> 
> > 
> >> This was one of the reasons that I have settled with the callback.
> >>
> >> What we can also do is to code this within the DMA drivers itself.
> >>
> >> When setting up the transfer and we realize that one of the SG will not
> >> going to fit, we destroy what we have done so far, pass the sg list
> >> along with length/sg limit to create a new sg list where all sg item's
> >> length is under the limit. Then using this new sg list we can set up the
> >> transfer.
> >>
> >> I'm not sure how hard is to do the sg list optimization, I see that
> >> sg_split() is not what we want so we might need to code this in
> >> dmaengine or in the scatterlist code.
> >>
> >> We certainly don't want to verify all slave_sg transfers proactively to
> >> avoid adding latency when it is not necessary.
> > 
> > latency would be added at prepare, not when submitting..
> 
> Yes, but you submit transfers all the time, and added latency would be crucial.
> 
> But this could be done with a DMAengine internal helper, I think.
> If a DMA driver figures out that one of the SG length is over the supported
> limit, then it could clean up everything and call something like:
> 
> struct dma_async_tx_descriptor *dmaengine_fixup_and_prep_slave_sg(
> 	struct dma_chan *chan, struct scatterlist *sgl,
> 	unsigned int sg_len, enum dma_transfer_direction dir,
> 	unsigned long flags, size_t max_sg_dma_len)
> 
> and this would split up the original SG list to a temp one meeting the
> max_sg_dma_len and call chan->device->device_prep_slave_sg()
> 
> In this round the setup would succeed and the caller and I would be happy.
> 
> But if the caller already aware of the sg_dma_len limit it can prepare the SG
> list correctly and we save time to recreate the list.

rather than driver figure out, why don't we add the in prep_ call. Since we
would know the capability of the device we can do that split, that way we
have common split code in middle in the framework and drivers get sg list
which is supported by them. It can also split a list to multipe prep_ calls
based on controller support.

Yes that is a big ask, we can split up and do in stages :)

Patch

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8319101170fc..739824b94c1b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -705,6 +705,9 @@  struct dma_filter {
  * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
  * @device_config: Pushes a new configuration to a channel, return 0 or an error
  *	code
+ * @device_get_max_len: Get the maximum supported length in bytes of a slave
+ *	transfer based on the set dma_slave_config. The length limitation
+ *	applies to each SG element's length.
  * @device_pause: Pauses any transfer happening on a channel. Returns
  *	0 or an error code
  * @device_resume: Resumes any transfer on a channel previously
@@ -792,6 +795,8 @@  struct dma_device {
 
 	int (*device_config)(struct dma_chan *chan,
 			     struct dma_slave_config *config);
+	u32 (*device_get_max_len)(struct dma_chan *chan,
+				  enum dma_transfer_direction dir);
 	int (*device_pause)(struct dma_chan *chan);
 	int (*device_resume)(struct dma_chan *chan);
 	int (*device_terminate_all)(struct dma_chan *chan);
@@ -812,6 +817,15 @@  static inline int dmaengine_slave_config(struct dma_chan *chan,
 	return -ENOSYS;
 }
 
+static inline u32 dmaengine_slave_get_max_len(struct dma_chan *chan,
+					      enum dma_transfer_direction dir)
+{
+	if (chan->device->device_get_max_len)
+		return chan->device->device_get_max_len(chan, dir);
+
+	return 0;
+}
+
 static inline bool is_slave_direction(enum dma_transfer_direction direction)
 {
 	return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM);