diff mbox

[2/6] Andes nds32: machine description of nds32 porting (2).

Message ID 522CA258.2010403@gmail.com
State New
Headers show

Commit Message

Chung-Ju Wu Sept. 8, 2013, 4:14 p.m. UTC
On 7/25/13 5:42 PM, Chung-Ju Wu wrote:
> On 7/24/13 11:50 PM, Chung-Ju Wu wrote:
> So we created another revised patch and here is a summary.
> The new modification is listed as item 3:
> 
>   1. Use error_at () for diagnostics statement and start with lowercase.
>   2. Some process can be done in nds32.opt.
>      Remove unnecessary parts from nds32-common.c file.
>   3. Use form-feeds (Control-L character) to separate logical sections.
> 

It has been a while since last v2 patch.
I create a new v3 patch to fix some typo and indentation.

Is it OK to apply on the trunk?


Best regards,
jasonwucj

Comments

Richard Sandiford Sept. 14, 2013, 11:40 a.m. UTC | #1
Some comments for part 2.

Chung-Ju Wu <jasonwucj@gmail.com> writes:
> +;; Include intrinsic functions definition.
> +(include "nds32.intrinsic.md")
> +
> +;; Include block move for nds32 multiple load/store behavior.
> +(include "nds32.multiple.md")
> +
> +;; Include DImode/DFmode operations.
> +(include "nds32.doubleword.md")
> +
> +;; Include peephole patterns.
> +(include "nds32.peephole2.md")

Usual gcc style is to use "-" rather than "." as a word separator in
filenames.

> +(define_insn "*store_si"
> +  [(set (match_operand:SI 0 "memory_operand"   "=U45, U33, U37, U45, m")
> +	(match_operand:SI 1 "register_operand" "   l,   l,   l,   d, r"))]
> +  ""

Loads, stores, register moves and constant moves should normally be in
the same pattern, so that anything operating on constraints can see all
the alternatives at once.  This might not be as important for LRA as it
was for reload, but it still seems like good practice.

> +(define_insn "*mov<mode>"
> +  [(set (match_operand:QIHISI 0 "register_operand" "=r, m, r")
> +	(match_operand:QIHISI 1 "register_operand" " r, r, m"))]
> +  ""
> +{
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      if (get_attr_length (insn) == 2)
> +	return "mov55\t%0, %1";
> +      else
> +	return "ori\t%0, %1, 0";
> +    case 1:
> +      return nds32_output_32bit_store (operands, <byte>);
> +    case 2:
> +      return nds32_output_32bit_load (operands, <byte>);
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "type" "alu,store,load")
> +   (set_attr "enabled" "1")
> +   (set_attr_alternative "length"
> +     [
> +       ;; Alternative 0
> +       (if_then_else (match_test "TARGET_16_BIT")
> +		     (const_int 2)
> +		     (const_int 4))
> +       ;; Alternative 1
> +       (const_int 4)
> +       ;; Alternative 2
> +       (const_int 4)
> +     ])])

The style used in the load and store patterns was:

(define_insn "*mov<mode>"
  [(set (match_operand:QIHISI 0 "register_operand" "=r, r, m, r")
	(match_operand:QIHISI 1 "register_operand" " r, r, r, m"))]
  ""
{
  switch (which_alternative)
    {
    case 0:
      return "mov55\t%0, %1";
    case 1:
      return "ori\t%0, %1, 0";
    case 2:
      return nds32_output_32bit_store (operands, <byte>);
    case 3:
      return nds32_output_32bit_load (operands, <byte>);
    default:
      gcc_unreachable ();
    }
}
  [(set_attr "type" "alu,alu,store,load")
   (set_attr "length" "2,4,4,4")])

which seems neater.  Did you try that but find that it didn't work here?

Same comment for other instructions where:

       (if_then_else (match_test "TARGET_16_BIT")
		     (const_int 2)
		     (const_int 4))

occurs (except for the special case of relaxable branch instructions,
where using the if_then_else is good).

> +;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
> +;; are able to match such instruction template.
> +(define_insn "*move_addr"
> +  [(set (match_operand:SI 0 "register_operand"       "=l, r")
> +	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
> +  ""
> +  "la\t%0, %1"
> +  [(set_attr "type" "move")
> +   (set_attr "length"  "8")])
> +
> +
> +(define_insn "*sethi"
> +  [(set (match_operand:SI 0 "register_operand"           "=r")
> +	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
> +  ""
> +{
> +  return "sethi\t%0, hi20(%1)";
> +}
> +  [(set_attr "type" "alu")
> +   (set_attr "length" "4")])
> +
> +
> +(define_insn "*lo_sum"
> +  [(set (match_operand:SI 0 "register_operand"             "=r")
> +	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
> +		   (match_operand:SI 2 "immediate_operand" " i")))]
> +  ""
> +  "ori\t%0, %1, lo12(%2)"
> +  [(set_attr "type" "alu")
> +   (set_attr "length" "4")])

Very minor, but "nds32_symbolic_operand" seems like a better choice for
*sethi and *lo_sum too, since (high ...) and (lo_sum ...) shouldn't be
used for const_ints.

Any pass would be in its rights to fuse a *sethi and *lo_sum pair back
into a single *move_addr.  Is that something you want to allow?
(That's a genuine question rather than a review comment btw.)

Is the "0" constraint on the *lo_sum really necessary?  It looks from
the later OR patterns as though this form of ORI allows the source and
destination registers to be different.

> +;; Zero extension instructions.
> +
> +(define_expand "zero_extend<mode>si2"
> +  [(set (match_operand:SI 0 "general_operand" "")
> +	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
> +  ""
> +{
> +  rtx tmp_reg;
> +
> +  /* We need to make sure operands[1] is a register.  */
> +  if (!REG_P (operands[1]))
> +    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);

Why do you need this?  It looks like the architecture has zero-extending loads.

> +
> +  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
> +     we create two rtx patterns:
> +       (reg:SI K) <- (zero_extend:SI (reg Y))
> +       (mem:SI X) <- (reg:SI K)
> +     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
> +     and the second rtx will be matched by mov naming pattern.  */
> +  if (MEM_P (operands[0]))
> +    {
> +      tmp_reg = gen_reg_rtx (SImode);
> +
> +      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
> +      emit_insn (gen_movsi (operands[0], tmp_reg));
> +
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*zero_extend<mode>si2_reg"
> +  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
> +	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
> +  ""
> +{
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      return "ze<size>33\t%0, %1";
> +    case 1:
> +      return "ze<size>\t%0, %1";
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "type"   "alu,alu")
> +   (set_attr "length" "  2,  4")])
> +
> +(define_insn "*zero_extend<mode>si2_load"
> +  [(set (match_operand:SI 0 "register_operand"                 "=  l, *r")
> +	(zero_extend:SI (match_operand:QIHI 1 "memory_operand" " U33,  m")))]
> +  ""
> +{
> +  if (which_alternative == 0)
> +    return nds32_output_16bit_load (operands, <byte>);
> +  else
> +    return nds32_output_32bit_load (operands, <byte>);
> +}
> +  [(set_attr "length" "2, 4")
> +   (set_attr "type" "load,load")])

Here too it's better to have a single pattern with both the register
and memory alternatives.  It ought to be possible to define
"zero_extend<mode>si2" directly as a define_insn rather than a
define_expand:

(define_insn "zero_extend<mode>si2"
  [(set (match_operand:SI 0 "register_operand" "w,w,r,*r")
	(zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" "w,r,U33,m")))]
  ...

The target-independent code will then handle memory destinations.

Same comments for "extend<mode>si2".

> +;; Arithmetic instructions.
> +
> +(define_expand "addsi3"
> +  [(set (match_operand:SI 0 "register_operand" "")
> +	(plus:SI (match_operand:SI 1 "register_operand" "")
> +		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
> +  ""
> +{
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
> +})

This looks like it's papering over a bug elsewhere.  Any CONST_INT passed
into to gen_addsi3 must already be correct for SImode.  If you find callers
where that isn't true, we need to fix them.  Also, any incorrect constants
are usually filtered out by the predicate.

> +(define_insn "*add<mode>3"
> +  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
> +	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
> +		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
> +  ""
> +  "@
> +  addi45\t%0, %2
> +  addi333\t%0, %1, %2
> +  add45\t%0, %2
> +  add333\t%0, %1, %2
> +  addi10.sp\t%2
> +  addri36.sp\t%0, %2
> +  addi\t%0, %1, %2
> +  add\t%0, %1, %2"
> +  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
> +   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])

The predicates in the define_expand and define_insn are different.
They should usually be the same.

Without the gen_int_mode, this too could be defined directly as
a define_insn, without a separate define_expand.

> +(define_expand "subsi3"
> +  [(set (match_operand:SI 0 "register_operand" "")
> +	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
> +		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
> +  ""
> +  ""
> +)

Operand 2 shouldn't allow immediates.  They should all go via the
add optab instead.

> +
> +(define_insn "*sub<mode>3"
> +  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
> +	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
> +		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
> +  ""
> +  "@
> +  subi45\t%0, %2
> +  subi333\t%0, %1, %2
> +  sub45\t%0, %2
> +  sub333\t%0, %1, %2
> +  subri\t%0, %2, %1
> +  sub\t%0, %1, %2"
> +  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
> +   (set_attr "length" "  2,  2,  2,  2,  4,  4")])

Here too a direct define_insn seems better than a define_expand/define_insn
pair.

> +(define_expand "andsi3"
> +  [(set (match_operand:SI         0 "register_operand" "")
> +	(and:SI (match_operand:SI 1 "register_operand" "")
> +		(match_operand:SI 2 "general_operand"  "")))]
> +  ""
> +{
> +  /* If operands[2] is const_int,
> +     we might be able to use other more efficient instructions.  */
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    {
> +      int mask = INTVAL (operands[2]);
> +
> +      if (mask == 255)
> +	{
> +	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
> +	  operands[1] = convert_to_mode (QImode, operands[1], 1);
> +	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
> +	  DONE;
> +	}
> +      else if (mask == 65535)
> +	{
> +	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
> +	  operands[1] = convert_to_mode (HImode, operands[1], 1);
> +	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
> +	  DONE;
> +	}
> +    }
> +})

It looks like the associated "*andsi3" insn also has a case for zeb.
That's usually the better approach.  Please consider adding a zeh case
to the "*andsi3" alternatives too and removing the code above.

With that change, the define_expand and define_insn could be fused.

> +;; For iorsi3 naming pattern, we have to use define_expand first,
> +;; and then design different anonymous patterns so that it can
> +;; simply set different instruction length according to ISA.
> +(define_expand "iorsi3"
> +  [(set (match_operand:SI 0 "register_operand"         "")
> +	(ior:SI (match_operand:SI 1 "register_operand" "")
> +		(match_operand:SI 2 "general_operand"  "")))]
> +  ""
> +  ""
> +)
> +
> +;; This is the iorsi3 pattern for V3/V3M ISA,
> +;; which DOES HAVE 'or33' instruction.
> +;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
> +(define_insn "*iorsi3"
> +  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
> +	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
> +		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
> +  ""
> +{
> +  int one_position;
> +
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      return "or33\t%0, %2";
> +    case 1:
> +      return "or\t%0, %1, %2";
> +    case 2:
> +      return "ori\t%0, %1, %2";
> +    case 3:
> +      /* If we reach this alternative,
> +         it must pass the nds32_can_use_bset_p() test,
> +         so that we can guarantee there is only one 1-bit
> +         within the immediate value.  */
> +      for (one_position = 31; one_position >= 0; one_position--)
> +	{
> +	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
> +	    {
> +	      /* Found the 1-bit position.  */
> +	      operands[2] = GEN_INT (one_position);
> +	      break;
> +	    }
> +	}
> +      return "bset\t%0, %1, %2";
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "type"   "alu,alu,alu,alu")
> +   (set_attr "length" "  2,  4,  4,  4")])

I don't understand the comment above the define_expand, sorry.
This too looks like a case where "iorsi3" should just be a define_insn,
with no define_expand.

Case 3 could use exact_log2.

Same comments for xor.  (You might be able to use code iterators
and have a single set of patterns for or and xor, including the
shifting variants.)

> +;; For negsi2 naming pattern, we have to use define_expand first,
> +;; and then design different anonymous patterns so that it can
> +;; output assembly code according to ISA.
> +(define_expand "negsi2"
> +  [(set (match_operand:SI 0 "register_operand"         "")
> +	(neg:SI (match_operand:SI 1 "register_operand" "")))]
> +  ""
> +  ""
> +)
> +
> +;; Note that there is NO 'neg33' instruction for V2 ISA.
> +;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
> +;; is the only option for V2 ISA.
> +(define_insn "*negsi2"
> +  [(set (match_operand:SI 0 "register_operand"         "=w, r")
> +	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
> +  ""
> +  "@
> +   neg33\t%0, %1
> +   subri\t%0, %1, 0"
> +  [(set_attr "type"   "alu,alu")
> +   (set_attr "length" "  2,  4")])

Here too the define_expand seems redundant.  Same for one_cmplsi2.

(Looks like you already define the shift instructions directly though,
thanks.)

> +;; Shift instructions.
> +
> +(define_insn "ashlsi3"
> +  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
> +	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
> +		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]

Operand 2 doesn't allow memory, so nonmemory_operand would be better
than general_operand.  Both are correct, but nonmemory_operand is
tighter and so forces the pre-RA optimisers to treat the load as
a separate instruction.

Same for the other shift instructions.

> +(define_expand "mov<mode>cc"
> +  [(set (match_operand:QIHI 0 "register_operand" "")
> +	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
> +			   (match_operand:QIHI 2 "register_operand" "")
> +			   (match_operand:QIHI 3 "register_operand" "")))]
> +  "TARGET_CMOV"
> +{
> +  rtx insn;
> +
> +  /* For QImode and HImode conditional move,
> +     make them to be SImode behavior.  */
> +  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
> +  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
> +  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
> +
> +  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
> +
> +  if (!insn)
> +    FAIL;
> +
> +  emit_insn (insn);
> +  DONE;
> +})

It'd be better to handle QI, HI and SI using a single template if possible.
Subregs are harder to optimise than plain registers.

> +
> +(define_insn "cmovz"
> +  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
> +        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
> +			     (const_int 0))
> +			 (match_operand:SI 2 "register_operand"     " r, 0")
> +			 (match_operand:SI 3 "register_operand"     " 0, r")))]
> +  "TARGET_CMOV"
> +  "@
> +   cmovz\t%0, %2, %1
> +   cmovz\t%0, %3, %1"
> +  [(set_attr "type" "move")
> +   (set_attr "length"  "4")])
> +
> +(define_insn "cmovn"
> +  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
> +	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
> +			     (const_int 0))
> +			 (match_operand:SI 2 "register_operand"     " r, 0")
> +			 (match_operand:SI 3 "register_operand"     " 0, r")))]
> +  "TARGET_CMOV"
> +  "@
> +   cmovn\t%0, %2, %1
> +   cmovn\t%0, %3, %1"
> +  [(set_attr "type" "move")
> +   (set_attr "length"  "4")])
> +
> +(define_insn_and_split "*movsicc"
> +  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
> +	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
> +			   [(match_operand:SI 2 "register_operand" " r, r")
> +			    (const_int 0)])
> +			 (match_operand:SI 3 "register_operand"    " 0, r")
> +			 (match_operand:SI 4 "register_operand"    " r, 0")))]
> +  "TARGET_CMOV"
> +  "#"
> +  "reload_completed"
> +  [(pc)]
> +{
> +  enum rtx_code code = GET_CODE (operands[1]);
> +  rtx then_op = operands[3];
> +  rtx else_op = operands[4];
> +  rtx tmp;
> +
> +  /* According to the implementation in "movsicc" naming pattern,
> +     if we make transformation in which the comparison code is EQ,
> +     the desired target is at "else" part position semantically.
> +     Now it is the time (after reload_completed) to physically
> +     swap it to "then" part position.  */
> +  if (code == EQ)
> +    {
> +      tmp     = then_op;
> +      then_op = else_op;
> +      else_op = tmp;
> +    }
> +
> +  /* Choosing cmovz or cmovn is based on reload phase result.
> +     After reload phase, one source operand will use
> +     the same register as result operand.
> +     We can use cmovz/cmovn to catch the other source operand
> +     which has different register.
> +     So We check register number to determine using cmovz or cmovn.  */
> +  if (REGNO(then_op) == REGNO(operands[0]))
> +    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
> +  else if (REGNO(else_op) == REGNO(operands[0]))
> +    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
> +  else
> +    gcc_unreachable ();
> +
> +  DONE;
> +})

I don't really see off-hand how the third instruction would match in its
define_insn form, since the earlier instructions ought to match first.
And it looks from first glance like the splitter is working around
a bug in the first two instructions.  E.g. shouldn't the first pattern be:

(define_insn "cmovz"
  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
			     (const_int 0))
			 (match_operand:SI 2 "register_operand"     " r, 0")
			 (match_operand:SI 3 "register_operand"     " 0, r")))]
  "TARGET_CMOV"
  "@
   cmovz\t%0, %2, %1
   cmovn\t%0, %3, %1"
  [(set_attr "type" "move")
   (set_attr "length"  "4")])

with the second alternative being "cmovn" rather than "cmovz"?
With a similar change to the "cmovn" pattern, the define_insn_and_split
ought to be unnecessary.

> +	  /* We want to plus 1 into the integer value
> +	     of operands[2] to create 'slt' instruction.
> +	     This caculation is performed on the host machine,
> +	     which may be 64-bit integer.
> +	     So the meaning of caculation result may be
> +	     different from the 32-bit nds32 target.
> +
> +	     For example:
> +	       0x7fffffff + 0x1 -> 0x80000000,
> +	       this value is POSITIVE on 64-bit machine,
> +	       but the expected value on 32-bit nds32 target
> +	       should be NEGATIVE value.
> +
> +	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
> +	     explicitly create SImode constant rtx.  */
> +	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);

The comment seems unnecessary.  gen_int_mode is better than GEN_INT
whereever it can be used.  It's new uses of GEN_INT that deserve
comments :-)

> +	  /* We want to plus 1 into the integer value
> +	     of operands[2] to create 'slt' instruction.
> +	     This caculation is performed on the host machine,
> +	     which may be 64-bit integer.
> +	     So the meaning of caculation result may be
> +	     different from the 32-bit nds32 target.
> +
> +	     For example:
> +	       0x7fffffff + 0x1 -> 0x80000000,
> +	       this value is POSITIVE on 64-bit machine,
> +	       but the expected value on 32-bit nds32 target
> +	       should be NEGATIVE value.
> +
> +	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
> +	     explicitly create SImode constant rtx.  */
> +	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
> +
> +	  if (code == LE)
> +	    {
> +	      /* LE, use slts instruction */
> +	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
> +	    }
> +	  else
> +	    {
> +	      /* LEU, use slt instruction */
> +	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
> +	    }

Same here, but (le:SI X INT_MAX) isn't the same as (lt:SI X INT_MIN).
I'm not sure we're guaranteed to have optimised away all those cases
by this point, but at least an assert would be good.

> +;; Subroutine call instruction returning no value.
> +;;   operands[0]: It should be a mem RTX whose address is
> +;;                the the address of the function.
> +;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
> +;;   operands[2]: It is the number of registers used as operands.
> +
> +(define_expand "call"
> +  [(parallel [(call (match_operand 0 "memory_operand" "")
> +		    (match_operand 1 "general_operand" ""))

Might as well just use (match_operand 1 "" ""), like the define_insns do.

FWIW you can drop trailing ""s from match_operands.

> +(define_expand "prologue" [(const_int 0)]
> +  ""
> +{
> +  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
> +  if (TARGET_V3PUSH)
> +    nds32_expand_prologue_v3push();
> +  else
> +    nds32_expand_prologue();
> +  DONE;
> +})

Space before "()".  Same for the epilogue pattern.

> +;; nop instruction.
> +
> +(define_insn "nop"
> +  [(const_int 0)]
> +  ""
> +  "nop16"
> +  [(set_attr "type"   "misc")
> +   (set_attr "length"    "2")])

What happens for !TARGET_16BIT?  Won't the alternative be disabled there?

> +  /* The En4 encoding string of the instruction is
> +     in the bitwise fashion of following: "fp gp lp sp".  */
> +  const char *En4_str[16] =

The optimisers would probably pick this up anyway, but:

  static const char *const En4_str[16] =

explicitly says that this is an array that should live in read-only storage.

> +  /* Create RbRe_str string.
> +     Note that we need to output ',' character if there exists En4 field.  */
> +  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
> +      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
> +  else
> +      RbRe_str = "";

Excess indentation.  GNU style is not to wrap "INTVAL (operands[2]) != 0"
in brackets.

Same comments for pop.

Overall it looks good to me FWIW.

Thanks,
Richard
diff mbox

Patch

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..f82f725
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,117 @@ 
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+
+/* Run-time Target Specification.  */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+
+/* Defining the Output Assembler Language.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..869726e
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2835 @@ 
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32.intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32.multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32.doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32.peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*store_si"
+  [(set (match_operand:SI 0 "memory_operand"   "=U45, U33, U37, U45, m")
+	(match_operand:SI 1 "register_operand" "   l,   l,   l,   d, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_store (operands, 4);
+
+    default:
+      return nds32_output_32bit_store (operands, 4);
+    }
+}
+  [(set_attr "type"   "store,store,store,store,store")
+   (set_attr "length" "    2,    2,    2,    2,    4")])
+
+(define_insn "*store_<mode>"
+  [(set (match_operand:QIHI 0 "memory_operand"   "=U33, m")
+	(match_operand:QIHI 1 "register_operand" "   l, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_store (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_store (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "store,store")
+   (set_attr "length" "    2,    4")])
+
+(define_insn "*load_si"
+  [(set (match_operand:SI 0 "register_operand" "=  l,   l,   l,   d, r")
+	(match_operand:SI 1 "memory_operand"   " U45, U33, U37, U45, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_load (operands, 4);
+
+    default:
+      return nds32_output_32bit_load (operands, 4);
+    }
+}
+  [(set_attr "type"   "load,load,load,load,load")
+   (set_attr "length" "   2,   2,   2,   2,   4")])
+
+(define_insn "*load_<mode>"
+  [(set (match_operand:QIHI 0 "register_operand" "=  l, r")
+	(match_operand:QIHI 1 "memory_operand"   " U33, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_load (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_load (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "load,load")
+   (set_attr "length" "   2,   4")])
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "=r, m, r")
+	(match_operand:QIHISI 1 "register_operand" " r, r, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "mov55\t%0, %1";
+      else
+	return "ori\t%0, %1, 0";
+    case 1:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 2:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "alu,store,load")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (const_int 4)
+       ;; Alternative 2
+       (const_int 4)
+     ])])
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to support QI and HI in the split patterns.
+;; Also, we use const_int_operand to limit that only CONST_INT
+;; is able to match such instruction template.
+;;
+;; Besides, in the split condition, we ask big-constant split to be
+;; performed after reload phase. So that the mov2add optimization
+;; in postreload have chance to optimize the code.
+
+(define_insn_and_split "*movsi_const"
+  [(set (match_operand:QIHISI 0 "register_operand"  "=   d,    r,    r,    r,    t,    r")
+	(match_operand:QIHISI 1 "const_int_operand" " Ip05, Is05, Is20, Ihig, Ispl, Ispl"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "movpi45\t%0, %1";
+      /* else fall through.  */
+    case 1:
+      if (get_attr_length (insn) == 2)
+	return "movi55\t%0, %1";
+      /* else fall through.  */
+    case 2:
+      return "movi\t%0, %1";
+    case 3:
+      return "sethi\t%0, hi20(%1)";
+    case 4:
+      /* Use $r15, if the value is NOT in the range of Is20,
+         we must output "sethi + ori" directly since
+         we may already passed the split stage.  */
+      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
+
+    default:
+      return "#";
+    }
+}
+  "reload_completed
+   && satisfies_constraint_Ispl (operands[1])
+   && !satisfies_constraint_Is20 (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:QIHISI (match_dup 0) (match_dup 2)))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xfff);
+  operands[1] = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+}
+  [(set_attr "type" "alu,alu,alu,alu,alu,alu")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 4)
+       ;; Alternative 4
+       (const_int 8)
+       ;; Alternative 5
+       (const_int 8)
+     ])])
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
+  ""
+{
+  return "sethi\t%0, hi20(%1)";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
+		   (match_operand:SI 2 "immediate_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (zero_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*zero_extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*zero_extend<mode>si2_load"
+  [(set (match_operand:SI 0 "register_operand"                 "=  l, *r")
+	(zero_extend:SI (match_operand:QIHI 1 "memory_operand" " U33,  m")))]
+  ""
+{
+  if (which_alternative == 0)
+    return nds32_output_16bit_load (operands, <byte>);
+  else
+    return nds32_output_32bit_load (operands, <byte>);
+}
+  [(set_attr "length" "2, 4")
+   (set_attr "type" "load,load")])
+
+;; Sign extension instructions.
+
+(define_expand "extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(sign_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (sign_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (sign_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(sign_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*extend<mode>si2_load"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(sign_extend:SI (match_operand:QIHI 1 "memory_operand" " m")))]
+  ""
+{
+  rtx mem_addr_op;
+  rtx op0;
+  rtx op1;
+
+  /* Retrieve rtx X from (mem (X ...)).  */
+  mem_addr_op = XEXP (operands[1], 0);
+
+  switch (GET_CODE (mem_addr_op))
+    {
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+         (mem (const (...)))
+         => access global variables,
+            use "lbsi.gp / lhsi.gp" */
+      return "l<size>si.gp\t%0, %1";
+
+    case REG:
+      /* (mem (reg X))
+         => access location by using register,
+            use "lbsi / lhsi" */
+      return "l<size>si\t%0, %1";
+
+    case PLUS:
+      /* get operands first */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (plus reg reg))
+         => access location by adding two registers,
+            use "lbs / lhs" */
+      if (REG_P (op0) && REG_P (op1))
+	return "l<size>s\t%0, %1";
+
+      /* (mem (plus reg const_int))
+         => access location by adding one register with const_int,
+            use "lbsi / lhsi" */
+      if (REG_P (op0) && CONST_INT_P (op1))
+	return "l<size>si\t%0, %1";
+
+      /* (mem (plus (mult reg const_int) reg))
+         => access location by adding one register with
+            multiplication of register and const_int,
+            use "lbs / lhs" */
+      if (GET_CODE (op0) == MULT && REG_P (op1)
+	  &&       REG_P (XEXP (op0, 0))
+	  && CONST_INT_P (XEXP (op0, 1)))
+	return "l<size>s\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_MODIFY:
+      /* Get operands first.  */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (reg))))
+         => access location by using register which will be
+            post modified with reg,
+            use "lbs.bi/ lhs.bi / lws.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && REG_P (XEXP (op1, 1)))
+	return "l<size>s.bi\t%0, %1";
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (const_int))))
+         => access location by using register which will be
+            post modified with const_int,
+            use "lbsi.bi/ lhsi.bi / lwsi.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && CONST_INT_P (XEXP (op1, 1)))
+	return "l<size>si.bi\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+         => access location by using register which will be
+            post increment,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, <byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+         => access location by using register which will be
+            post decrement,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, -<byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case LO_SUM:
+      operands[2] = XEXP (mem_addr_op, 1);
+      operands[1] = XEXP (mem_addr_op, 0);
+      return "l<size>si\t%0, [%1 + lo12(%2)]";
+
+    default:
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+    }
+
+other_cases:
+  gcc_unreachable ();
+}
+  [(set_attr "type" "load")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
+})
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+  "@
+  addi45\t%0, %2
+  addi333\t%0, %1, %2
+  add45\t%0, %2
+  add333\t%0, %1, %2
+  addi10.sp\t%2
+  addri36.sp\t%0, %2
+  addi\t%0, %1, %2
+  add\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
+		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
+  ""
+  ""
+)
+
+(define_insn "*sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
+  ""
+  "@
+  subi45\t%0, %2
+  subi333\t%0, %1, %2
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mul33\t%0, %2";
+    case 1:
+      return "mul\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32.doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "andsi3"
+  [(set (match_operand:SI         0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+{
+  /* If operands[2] is const_int,
+     we might be able to use other more efficient instructions.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int mask = INTVAL (operands[2]);
+
+      if (mask == 255)
+	{
+	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
+	  operands[1] = convert_to_mode (QImode, operands[1], 1);
+	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+      else if (mask == 65535)
+	{
+	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
+	  operands[1] = convert_to_mode (HImode, operands[1], 1);
+	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    0,    0,    r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Ixls, Ix11, Ibms, Ifex, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff  -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0x01  -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "xlsb33\t%0, %1";
+    case 4:
+      return "x11b33\t%0, %1";
+    case 5:
+      operands[2] = GEN_INT(floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 6:
+      operands[2] = GEN_INT(floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 7:
+      return "andi\t%0, %1, %2";
+    case 8:
+      operands[2] = GEN_INT(~mask);
+      return "bitci\t%0, %1, %2";
+    case 9:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For iorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+;; This is the iorsi3 pattern for V3/V3M ISA,
+;; which DOES HAVE 'or33' instruction.
+;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
+(define_insn "*iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For xorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For negsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; Note that there is NO 'neg33' instruction for V2 ISA.
+;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
+;; is the only option for V2 ISA.
+(define_insn "*negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For one_cmplsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; This is the one_cmplsi2 pattern
+(define_insn "*one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
+		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "slli333\t%0, %1, %2";
+    case 1:
+      return "slli\t%0, %1, %2";
+    case 2:
+      return "sll\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srai45\t%0, %2";
+    case 1:
+      return "srai\t%0, %1, %2";
+    case 2:
+      return "sra\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srli45\t%0, %2";
+    case 1:
+      return "srli\t%0, %1, %2";
+    case 2:
+      return "srl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
+         If we don't split it, the conditional move transformation will fail
+         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         The reason we do not physically change their rtx position is that
+         gcc will also do opimization by reverse condition,
+         which may break up our transformation semantic
+         if we physically change rtx right now.
+         So we just pick up the corresponding comparison operator
+         based on the reverse status, leaving the "swap position" job
+         after reload phase by using define_insn_and_split strategy.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:QIHI 0 "register_operand" "")
+	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
+			   (match_operand:QIHI 2 "register_operand" "")
+			   (match_operand:QIHI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  rtx insn;
+
+  /* For QImode and HImode conditional move,
+     make them to be SImode behavior.  */
+  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
+
+  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
+
+  if (!insn)
+    FAIL;
+
+  emit_insn (insn);
+  DONE;
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "*movsicc"
+  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
+	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" " r, r")
+			    (const_int 0)])
+			 (match_operand:SI 3 "register_operand"    " 0, r")
+			 (match_operand:SI 4 "register_operand"    " r, 0")))]
+  "TARGET_CMOV"
+  "#"
+  "reload_completed"
+  [(pc)]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx then_op = operands[3];
+  rtx else_op = operands[4];
+  rtx tmp;
+
+  /* According to the implementation in "movsicc" naming pattern,
+     if we make transformation in which the comparison code is EQ,
+     the desired target is at "else" part position semantically.
+     Now it is the time (after reload_completed) to physically
+     swap it to "then" part position.  */
+  if (code == EQ)
+    {
+      tmp     = then_op;
+      then_op = else_op;
+      else_op = tmp;
+    }
+
+  /* Choosing cmovz or cmovn is based on reload phase result.
+     After reload phase, one source operand will use
+     the same register as result operand.
+     We can use cmovz/cmovn to catch the other source operand
+     which has different register.
+     So We check register number to determine using cmovz or cmovn.  */
+  if (REGNO(then_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
+  else if (REGNO(else_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	       *  =>
+	       *    bnezs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	       *  =>
+	       *    beqzs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	       *  =>
+	       *    bnez38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	       *  =>
+	       *    beqz38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	   *  =>
+	   *    bnez  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	   *  =>
+	   *    beqz  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to non-V3 ISAs,
+;; because they HAVE NO beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V2 || TARGET_ISA_V3M"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	   *  =>
+	   *    bne  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	   *  =>
+	   *    beq  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3,
+;; because V3 DOES HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	       *  =>
+	       *    bne  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	       *  =>
+	       *    bnec  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	       *  =>
+	       *    beq  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	       *  =>
+	       *    beqc  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	   * =>
+	   *   blez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	   * =>
+	   *   bltz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	   * =>
+	   *   bgez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	   * =>
+	   *   bgtz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jr5\t%0";
+  else
+    return "jr\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%0";
+  else
+    return "jral\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+        (if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "register_operand" "r"))
+		         (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%1";
+  else
+    return "jral\t%1";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push();
+  else
+    nds32_expand_prologue();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop();
+  else
+    nds32_expand_epilogue();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop16"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; stack push/pop multiple
+
+(define_insn "*stack_push_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_PUSH_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'push.s' instruction.  */
+  const char *push_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create push_str string.
+     Note that 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    push_str = "push.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			push_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+(define_insn "*stack_pop_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_POP_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'pop.s' instruction.  */
+  const char *pop_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create pop_str string.
+     Note that 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    pop_str = "pop.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			pop_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+;; stack v3push/v3pop
+
+(define_insn "*stack_v3push"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3PUSH)
+     ])]
+  ""
+{
+  return "v3push\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+(define_insn "*stack_v3pop"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3POP)
+     ])]
+  ""
+{
+  return "v3pop\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_func_return"
+  [(set (pc)
+	(unspec:SI [(reg:SI LP_REGNUM)] UNSPEC_FUNC_RETURN))]
+  ""
+  "ret5"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg, operands[0],
+			     GEN_INT (-INTVAL (operands[1]))));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------