diff mbox

[U-Boot,3/3] MIPS: Abstract cache op loops with a macro

Message ID 20160526155850.25412-4-paul.burton@imgtec.com
State Accepted
Commit fb64cda579985e21610672eae44faf40eadd71ea
Delegated to: Daniel Schwierzeck
Headers show

Commit Message

Paul Burton May 26, 2016, 3:58 p.m. UTC
The various cache maintenance routines perform a number of loops over
cache lines. Rather than duplicate the code for performing such loops,
abstract it out into a new cache_loop macro which performs an arbitrary
number of cache ops on a range of addresses. This reduces duplication in
the existing L1 cache maintenance code & will allow for not adding
further duplication when introducing L2 cache support.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>

---

 arch/mips/lib/cache.c | 59 ++++++++++++++++-----------------------------------
 1 file changed, 18 insertions(+), 41 deletions(-)

Comments

Marek Vasut May 26, 2016, 4:13 p.m. UTC | #1
On 05/26/2016 05:58 PM, Paul Burton wrote:
> The various cache maintenance routines perform a number of loops over
> cache lines. Rather than duplicate the code for performing such loops,
> abstract it out into a new cache_loop macro which performs an arbitrary
> number of cache ops on a range of addresses. This reduces duplication in
> the existing L1 cache maintenance code & will allow for not adding
> further duplication when introducing L2 cache support.
> 
> Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> 
> ---
> 
>  arch/mips/lib/cache.c | 59 ++++++++++++++++-----------------------------------
>  1 file changed, 18 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/mips/lib/cache.c b/arch/mips/lib/cache.c
> index 2bb91c6..320335c 100644
> --- a/arch/mips/lib/cache.c
> +++ b/arch/mips/lib/cache.c
> @@ -37,82 +37,59 @@ static inline unsigned long dcache_line_size(void)
>  	return 2 << dl;
>  }
>  
> +#define cache_loop(start, end, lsize, ops...) do {			\
> +	const void *addr = (const void *)(start & ~(lsize - 1));	\
> +	const void *aend = (const void *)((end - 1) & ~(lsize - 1));	\
> +	const unsigned int cache_ops[] = { ops };			\

Can't you turn this into a function instead and pass some flags to
select the ops instead ?

> +	unsigned int i;							\
> +									\
> +	for (; addr <= aend; addr += lsize) {				\
> +		for (i = 0; i < ARRAY_SIZE(cache_ops); i++)		\
> +			mips_cache(cache_ops[i], addr);			\
> +	}								\
> +} while (0)
> +
>  void flush_cache(ulong start_addr, ulong size)
>  {
>  	unsigned long ilsize = icache_line_size();
>  	unsigned long dlsize = dcache_line_size();
> -	const void *addr, *aend;
>  
>  	/* aend will be miscalculated when size is zero, so we return here */
>  	if (size == 0)
>  		return;
>  
> -	addr = (const void *)(start_addr & ~(dlsize - 1));
> -	aend = (const void *)((start_addr + size - 1) & ~(dlsize - 1));
> -
>  	if (ilsize == dlsize) {
>  		/* flush I-cache & D-cache simultaneously */
> -		while (1) {
> -			mips_cache(HIT_WRITEBACK_INV_D, addr);
> -			mips_cache(HIT_INVALIDATE_I, addr);
> -			if (addr == aend)
> -				break;
> -			addr += dlsize;
> -		}
> +		cache_loop(start_addr, start_addr + size, ilsize,
> +			   HIT_WRITEBACK_INV_D, HIT_INVALIDATE_I);
>  		return;
>  	}
>  
>  	/* flush D-cache */
> -	while (1) {
> -		mips_cache(HIT_WRITEBACK_INV_D, addr);
> -		if (addr == aend)
> -			break;
> -		addr += dlsize;
> -	}
> +	cache_loop(start_addr, start_addr + size, dlsize, HIT_WRITEBACK_INV_D);
>  
>  	/* flush I-cache */
> -	addr = (const void *)(start_addr & ~(ilsize - 1));
> -	aend = (const void *)((start_addr + size - 1) & ~(ilsize - 1));
> -	while (1) {
> -		mips_cache(HIT_INVALIDATE_I, addr);
> -		if (addr == aend)
> -			break;
> -		addr += ilsize;
> -	}
> +	cache_loop(start_addr, start_addr + size, ilsize, HIT_INVALIDATE_I);
>  }
>  
>  void flush_dcache_range(ulong start_addr, ulong stop)
>  {
>  	unsigned long lsize = dcache_line_size();
> -	const void *addr = (const void *)(start_addr & ~(lsize - 1));
> -	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
>  
>  	/* aend will be miscalculated when size is zero, so we return here */
>  	if (start_addr == stop)
>  		return;
>  
> -	while (1) {
> -		mips_cache(HIT_WRITEBACK_INV_D, addr);
> -		if (addr == aend)
> -			break;
> -		addr += lsize;
> -	}
> +	cache_loop(start_addr, stop, lsize, HIT_WRITEBACK_INV_D);
>  }
>  
>  void invalidate_dcache_range(ulong start_addr, ulong stop)
>  {
>  	unsigned long lsize = dcache_line_size();
> -	const void *addr = (const void *)(start_addr & ~(lsize - 1));
> -	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
>  
>  	/* aend will be miscalculated when size is zero, so we return here */
>  	if (start_addr == stop)
>  		return;
>  
> -	while (1) {
> -		mips_cache(HIT_INVALIDATE_D, addr);
> -		if (addr == aend)
> -			break;
> -		addr += lsize;
> -	}
> +	cache_loop(start_addr, stop, lsize, HIT_INVALIDATE_I);
>  }
>
Paul Burton May 27, 2016, 10:30 a.m. UTC | #2
On Thu, May 26, 2016 at 06:13:17PM +0200, Marek Vasut wrote:
> On 05/26/2016 05:58 PM, Paul Burton wrote:
> > The various cache maintenance routines perform a number of loops over
> > cache lines. Rather than duplicate the code for performing such loops,
> > abstract it out into a new cache_loop macro which performs an arbitrary
> > number of cache ops on a range of addresses. This reduces duplication in
> > the existing L1 cache maintenance code & will allow for not adding
> > further duplication when introducing L2 cache support.
> > 
> > Signed-off-by: Paul Burton <paul.burton@imgtec.com>
> > 
> > ---
> > 
> >  arch/mips/lib/cache.c | 59 ++++++++++++++++-----------------------------------
> >  1 file changed, 18 insertions(+), 41 deletions(-)
> > 
> > diff --git a/arch/mips/lib/cache.c b/arch/mips/lib/cache.c
> > index 2bb91c6..320335c 100644
> > --- a/arch/mips/lib/cache.c
> > +++ b/arch/mips/lib/cache.c
> > @@ -37,82 +37,59 @@ static inline unsigned long dcache_line_size(void)
> >  	return 2 << dl;
> >  }
> >  
> > +#define cache_loop(start, end, lsize, ops...) do {			\
> > +	const void *addr = (const void *)(start & ~(lsize - 1));	\
> > +	const void *aend = (const void *)((end - 1) & ~(lsize - 1));	\
> > +	const unsigned int cache_ops[] = { ops };			\
> 
> Can't you turn this into a function instead and pass some flags to
> select the ops instead ?

Hi Marek,

The problem is that then both that function & its callers would need to
know about the types of cache ops, and there'd need to be some mapping
from flags to the actual cache op values (of which there'll be a couple
more once L2 support is added). I think it's much simpler & cleaner to
just go with the macro in this case - it keeps knowledge of which cache
ops to use with the callers, and it ends up generating the same code as
before (ie. the inner loop gets unrolled to just the relevant cache
instructions).

Thanks,
    Paul

> 
> > +	unsigned int i;							\
> > +									\
> > +	for (; addr <= aend; addr += lsize) {				\
> > +		for (i = 0; i < ARRAY_SIZE(cache_ops); i++)		\
> > +			mips_cache(cache_ops[i], addr);			\
> > +	}								\
> > +} while (0)
> > +
> >  void flush_cache(ulong start_addr, ulong size)
> >  {
> >  	unsigned long ilsize = icache_line_size();
> >  	unsigned long dlsize = dcache_line_size();
> > -	const void *addr, *aend;
> >  
> >  	/* aend will be miscalculated when size is zero, so we return here */
> >  	if (size == 0)
> >  		return;
> >  
> > -	addr = (const void *)(start_addr & ~(dlsize - 1));
> > -	aend = (const void *)((start_addr + size - 1) & ~(dlsize - 1));
> > -
> >  	if (ilsize == dlsize) {
> >  		/* flush I-cache & D-cache simultaneously */
> > -		while (1) {
> > -			mips_cache(HIT_WRITEBACK_INV_D, addr);
> > -			mips_cache(HIT_INVALIDATE_I, addr);
> > -			if (addr == aend)
> > -				break;
> > -			addr += dlsize;
> > -		}
> > +		cache_loop(start_addr, start_addr + size, ilsize,
> > +			   HIT_WRITEBACK_INV_D, HIT_INVALIDATE_I);
> >  		return;
> >  	}
> >  
> >  	/* flush D-cache */
> > -	while (1) {
> > -		mips_cache(HIT_WRITEBACK_INV_D, addr);
> > -		if (addr == aend)
> > -			break;
> > -		addr += dlsize;
> > -	}
> > +	cache_loop(start_addr, start_addr + size, dlsize, HIT_WRITEBACK_INV_D);
> >  
> >  	/* flush I-cache */
> > -	addr = (const void *)(start_addr & ~(ilsize - 1));
> > -	aend = (const void *)((start_addr + size - 1) & ~(ilsize - 1));
> > -	while (1) {
> > -		mips_cache(HIT_INVALIDATE_I, addr);
> > -		if (addr == aend)
> > -			break;
> > -		addr += ilsize;
> > -	}
> > +	cache_loop(start_addr, start_addr + size, ilsize, HIT_INVALIDATE_I);
> >  }
> >  
> >  void flush_dcache_range(ulong start_addr, ulong stop)
> >  {
> >  	unsigned long lsize = dcache_line_size();
> > -	const void *addr = (const void *)(start_addr & ~(lsize - 1));
> > -	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
> >  
> >  	/* aend will be miscalculated when size is zero, so we return here */
> >  	if (start_addr == stop)
> >  		return;
> >  
> > -	while (1) {
> > -		mips_cache(HIT_WRITEBACK_INV_D, addr);
> > -		if (addr == aend)
> > -			break;
> > -		addr += lsize;
> > -	}
> > +	cache_loop(start_addr, stop, lsize, HIT_WRITEBACK_INV_D);
> >  }
> >  
> >  void invalidate_dcache_range(ulong start_addr, ulong stop)
> >  {
> >  	unsigned long lsize = dcache_line_size();
> > -	const void *addr = (const void *)(start_addr & ~(lsize - 1));
> > -	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
> >  
> >  	/* aend will be miscalculated when size is zero, so we return here */
> >  	if (start_addr == stop)
> >  		return;
> >  
> > -	while (1) {
> > -		mips_cache(HIT_INVALIDATE_D, addr);
> > -		if (addr == aend)
> > -			break;
> > -		addr += lsize;
> > -	}
> > +	cache_loop(start_addr, stop, lsize, HIT_INVALIDATE_I);
> >  }
> > 
> 
> 
> -- 
> Best regards,
> Marek Vasut
Marek Vasut May 27, 2016, 2:36 p.m. UTC | #3
On 05/27/2016 12:30 PM, Paul Burton wrote:
> On Thu, May 26, 2016 at 06:13:17PM +0200, Marek Vasut wrote:
>> On 05/26/2016 05:58 PM, Paul Burton wrote:
>>> The various cache maintenance routines perform a number of loops over
>>> cache lines. Rather than duplicate the code for performing such loops,
>>> abstract it out into a new cache_loop macro which performs an arbitrary
>>> number of cache ops on a range of addresses. This reduces duplication in
>>> the existing L1 cache maintenance code & will allow for not adding
>>> further duplication when introducing L2 cache support.
>>>
>>> Signed-off-by: Paul Burton <paul.burton@imgtec.com>
>>>
>>> ---
>>>
>>>  arch/mips/lib/cache.c | 59 ++++++++++++++++-----------------------------------
>>>  1 file changed, 18 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/arch/mips/lib/cache.c b/arch/mips/lib/cache.c
>>> index 2bb91c6..320335c 100644
>>> --- a/arch/mips/lib/cache.c
>>> +++ b/arch/mips/lib/cache.c
>>> @@ -37,82 +37,59 @@ static inline unsigned long dcache_line_size(void)
>>>  	return 2 << dl;
>>>  }
>>>  
>>> +#define cache_loop(start, end, lsize, ops...) do {			\
>>> +	const void *addr = (const void *)(start & ~(lsize - 1));	\
>>> +	const void *aend = (const void *)((end - 1) & ~(lsize - 1));	\
>>> +	const unsigned int cache_ops[] = { ops };			\
>>
>> Can't you turn this into a function instead and pass some flags to
>> select the ops instead ?
> 
> Hi Marek,

Hi!

> The problem is that then both that function & its callers would need to
> know about the types of cache ops, and there'd need to be some mapping
> from flags to the actual cache op values (of which there'll be a couple
> more once L2 support is added).

But the callers already know which ops they must invoke, right ?
You can just have enum {} for the bit definitions. It's all local
to this file, so I don't see a problem.

> I think it's much simpler & cleaner to
> just go with the macro in this case - it keeps knowledge of which cache
> ops to use with the callers, and it ends up generating the same code as
> before (ie. the inner loop gets unrolled to just the relevant cache
> instructions).

I would expect GCC to take care of inlining the code, but you can always
use the always_inline directive if that's your concern. Even cleaner way
might be to use CONFIG_CC_OPTIMIZE_LIBS_FOR_SPEED and compile the
cache file with -O2 .

> Thanks,
>     Paul
> 
>>
>>> +	unsigned int i;							\
>>> +									\
>>> +	for (; addr <= aend; addr += lsize) {				\
>>> +		for (i = 0; i < ARRAY_SIZE(cache_ops); i++)		\
>>> +			mips_cache(cache_ops[i], addr);			\
>>> +	}								\
>>> +} while (0)
>>> +
>>>  void flush_cache(ulong start_addr, ulong size)
>>>  {
>>>  	unsigned long ilsize = icache_line_size();
>>>  	unsigned long dlsize = dcache_line_size();
>>> -	const void *addr, *aend;
>>>  
>>>  	/* aend will be miscalculated when size is zero, so we return here */
>>>  	if (size == 0)
>>>  		return;
>>>  
>>> -	addr = (const void *)(start_addr & ~(dlsize - 1));
>>> -	aend = (const void *)((start_addr + size - 1) & ~(dlsize - 1));
>>> -
>>>  	if (ilsize == dlsize) {
>>>  		/* flush I-cache & D-cache simultaneously */
>>> -		while (1) {
>>> -			mips_cache(HIT_WRITEBACK_INV_D, addr);
>>> -			mips_cache(HIT_INVALIDATE_I, addr);
>>> -			if (addr == aend)
>>> -				break;
>>> -			addr += dlsize;
>>> -		}
>>> +		cache_loop(start_addr, start_addr + size, ilsize,
>>> +			   HIT_WRITEBACK_INV_D, HIT_INVALIDATE_I);
>>>  		return;
>>>  	}
>>>  
>>>  	/* flush D-cache */
>>> -	while (1) {
>>> -		mips_cache(HIT_WRITEBACK_INV_D, addr);
>>> -		if (addr == aend)
>>> -			break;
>>> -		addr += dlsize;
>>> -	}
>>> +	cache_loop(start_addr, start_addr + size, dlsize, HIT_WRITEBACK_INV_D);
>>>  
>>>  	/* flush I-cache */
>>> -	addr = (const void *)(start_addr & ~(ilsize - 1));
>>> -	aend = (const void *)((start_addr + size - 1) & ~(ilsize - 1));
>>> -	while (1) {
>>> -		mips_cache(HIT_INVALIDATE_I, addr);
>>> -		if (addr == aend)
>>> -			break;
>>> -		addr += ilsize;
>>> -	}
>>> +	cache_loop(start_addr, start_addr + size, ilsize, HIT_INVALIDATE_I);
>>>  }
>>>  
>>>  void flush_dcache_range(ulong start_addr, ulong stop)
>>>  {
>>>  	unsigned long lsize = dcache_line_size();
>>> -	const void *addr = (const void *)(start_addr & ~(lsize - 1));
>>> -	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
>>>  
>>>  	/* aend will be miscalculated when size is zero, so we return here */
>>>  	if (start_addr == stop)
>>>  		return;
>>>  
>>> -	while (1) {
>>> -		mips_cache(HIT_WRITEBACK_INV_D, addr);
>>> -		if (addr == aend)
>>> -			break;
>>> -		addr += lsize;
>>> -	}
>>> +	cache_loop(start_addr, stop, lsize, HIT_WRITEBACK_INV_D);
>>>  }
>>>  
>>>  void invalidate_dcache_range(ulong start_addr, ulong stop)
>>>  {
>>>  	unsigned long lsize = dcache_line_size();
>>> -	const void *addr = (const void *)(start_addr & ~(lsize - 1));
>>> -	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
>>>  
>>>  	/* aend will be miscalculated when size is zero, so we return here */
>>>  	if (start_addr == stop)
>>>  		return;
>>>  
>>> -	while (1) {
>>> -		mips_cache(HIT_INVALIDATE_D, addr);
>>> -		if (addr == aend)
>>> -			break;
>>> -		addr += lsize;
>>> -	}
>>> +	cache_loop(start_addr, stop, lsize, HIT_INVALIDATE_I);
>>>  }
>>>
>>
>>
>> -- 
>> Best regards,
>> Marek Vasut
Paul Burton May 27, 2016, 2:48 p.m. UTC | #4
On Fri, May 27, 2016 at 04:36:21PM +0200, Marek Vasut wrote:
> > The problem is that then both that function & its callers would need to
> > know about the types of cache ops, and there'd need to be some mapping
> > from flags to the actual cache op values (of which there'll be a couple
> > more once L2 support is added).
> 
> But the callers already know which ops they must invoke, right ?
> You can just have enum {} for the bit definitions. It's all local
> to this file, so I don't see a problem.

It's all local to the file, but it's still an extra level of abstraction
that I don't see the value of. It just means that both callers & callee
have to know about various cache ops, and I don't see the point in
duplicating that knowledge. I think it's much cleaner to keep that
knowledge in one place - the caller.

> > I think it's much simpler & cleaner to
> > just go with the macro in this case - it keeps knowledge of which cache
> > ops to use with the callers, and it ends up generating the same code as
> > before (ie. the inner loop gets unrolled to just the relevant cache
> > instructions).
> 
> I would expect GCC to take care of inlining the code, but you can always
> use the always_inline directive if that's your concern. Even cleaner way
> might be to use CONFIG_CC_OPTIMIZE_LIBS_FOR_SPEED and compile the
> cache file with -O2 .

It may well inline it, remove the flags indirection & generate the same
code - I haven't tried it to find out. Making the code less clear with
that abstraction, even if the compiler will come along & clean it back
up, doesn't make much sense to me as someone who has to read the code.

I understand the preference for functions over macros, but I think this
is a case where the macro has strengths that a function cannot provide
(ignoring perhaps varargs which would be even more messy).

Thanks,
    Paul
Marek Vasut May 28, 2016, 12:27 p.m. UTC | #5
On 05/27/2016 04:48 PM, Paul Burton wrote:
> On Fri, May 27, 2016 at 04:36:21PM +0200, Marek Vasut wrote:
>>> The problem is that then both that function & its callers would need to
>>> know about the types of cache ops, and there'd need to be some mapping
>>> from flags to the actual cache op values (of which there'll be a couple
>>> more once L2 support is added).
>>
>> But the callers already know which ops they must invoke, right ?
>> You can just have enum {} for the bit definitions. It's all local
>> to this file, so I don't see a problem.
> 
> It's all local to the file, but it's still an extra level of abstraction
> that I don't see the value of. It just means that both callers & callee
> have to know about various cache ops, and I don't see the point in
> duplicating that knowledge. I think it's much cleaner to keep that
> knowledge in one place - the caller.

OK, I see what you mean.

>>> I think it's much simpler & cleaner to
>>> just go with the macro in this case - it keeps knowledge of which cache
>>> ops to use with the callers, and it ends up generating the same code as
>>> before (ie. the inner loop gets unrolled to just the relevant cache
>>> instructions).
>>
>> I would expect GCC to take care of inlining the code, but you can always
>> use the always_inline directive if that's your concern. Even cleaner way
>> might be to use CONFIG_CC_OPTIMIZE_LIBS_FOR_SPEED and compile the
>> cache file with -O2 .
> 
> It may well inline it, remove the flags indirection & generate the same
> code - I haven't tried it to find out. Making the code less clear with
> that abstraction, even if the compiler will come along & clean it back
> up, doesn't make much sense to me as someone who has to read the code.

It makes sense when you debug the code with GDB though. It's much easier
to locate lines if you don't use huge variadic macros.

> I understand the preference for functions over macros, but I think this
> is a case where the macro has strengths that a function cannot provide
> (ignoring perhaps varargs which would be even more messy).

What would you say to turning this into a variadic function then ?
It would avoid having the same knowledge in both caller and callee and
it would not be a macro, so it's a win-win. What do you think ?

> Thanks,
>     Paul
>
diff mbox

Patch

diff --git a/arch/mips/lib/cache.c b/arch/mips/lib/cache.c
index 2bb91c6..320335c 100644
--- a/arch/mips/lib/cache.c
+++ b/arch/mips/lib/cache.c
@@ -37,82 +37,59 @@  static inline unsigned long dcache_line_size(void)
 	return 2 << dl;
 }
 
+#define cache_loop(start, end, lsize, ops...) do {			\
+	const void *addr = (const void *)(start & ~(lsize - 1));	\
+	const void *aend = (const void *)((end - 1) & ~(lsize - 1));	\
+	const unsigned int cache_ops[] = { ops };			\
+	unsigned int i;							\
+									\
+	for (; addr <= aend; addr += lsize) {				\
+		for (i = 0; i < ARRAY_SIZE(cache_ops); i++)		\
+			mips_cache(cache_ops[i], addr);			\
+	}								\
+} while (0)
+
 void flush_cache(ulong start_addr, ulong size)
 {
 	unsigned long ilsize = icache_line_size();
 	unsigned long dlsize = dcache_line_size();
-	const void *addr, *aend;
 
 	/* aend will be miscalculated when size is zero, so we return here */
 	if (size == 0)
 		return;
 
-	addr = (const void *)(start_addr & ~(dlsize - 1));
-	aend = (const void *)((start_addr + size - 1) & ~(dlsize - 1));
-
 	if (ilsize == dlsize) {
 		/* flush I-cache & D-cache simultaneously */
-		while (1) {
-			mips_cache(HIT_WRITEBACK_INV_D, addr);
-			mips_cache(HIT_INVALIDATE_I, addr);
-			if (addr == aend)
-				break;
-			addr += dlsize;
-		}
+		cache_loop(start_addr, start_addr + size, ilsize,
+			   HIT_WRITEBACK_INV_D, HIT_INVALIDATE_I);
 		return;
 	}
 
 	/* flush D-cache */
-	while (1) {
-		mips_cache(HIT_WRITEBACK_INV_D, addr);
-		if (addr == aend)
-			break;
-		addr += dlsize;
-	}
+	cache_loop(start_addr, start_addr + size, dlsize, HIT_WRITEBACK_INV_D);
 
 	/* flush I-cache */
-	addr = (const void *)(start_addr & ~(ilsize - 1));
-	aend = (const void *)((start_addr + size - 1) & ~(ilsize - 1));
-	while (1) {
-		mips_cache(HIT_INVALIDATE_I, addr);
-		if (addr == aend)
-			break;
-		addr += ilsize;
-	}
+	cache_loop(start_addr, start_addr + size, ilsize, HIT_INVALIDATE_I);
 }
 
 void flush_dcache_range(ulong start_addr, ulong stop)
 {
 	unsigned long lsize = dcache_line_size();
-	const void *addr = (const void *)(start_addr & ~(lsize - 1));
-	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
 
 	/* aend will be miscalculated when size is zero, so we return here */
 	if (start_addr == stop)
 		return;
 
-	while (1) {
-		mips_cache(HIT_WRITEBACK_INV_D, addr);
-		if (addr == aend)
-			break;
-		addr += lsize;
-	}
+	cache_loop(start_addr, stop, lsize, HIT_WRITEBACK_INV_D);
 }
 
 void invalidate_dcache_range(ulong start_addr, ulong stop)
 {
 	unsigned long lsize = dcache_line_size();
-	const void *addr = (const void *)(start_addr & ~(lsize - 1));
-	const void *aend = (const void *)((stop - 1) & ~(lsize - 1));
 
 	/* aend will be miscalculated when size is zero, so we return here */
 	if (start_addr == stop)
 		return;
 
-	while (1) {
-		mips_cache(HIT_INVALIDATE_D, addr);
-		if (addr == aend)
-			break;
-		addr += lsize;
-	}
+	cache_loop(start_addr, stop, lsize, HIT_INVALIDATE_I);
 }