Patchwork [2/6] Andes nds32: machine description of nds32 porting (2).

login
register
mail settings
Submitter Chung-Ju Wu
Date July 24, 2013, 3:50 p.m.
Message ID <51EFF7CA.6050601@gmail.com>
Download mbox | patch
Permalink /patch/261440/
State New
Headers show

Comments

Chung-Ju Wu - July 24, 2013, 3:50 p.m.
Hi, Joseph,

Sorry for the late revised patch.
We have completed all of it based on your review comments.

On 7/10/13 7:27 AM, Joseph S. Myers wrote:
> On Mon, 8 Jul 2013, Chung-Ju Wu wrote:
>
>> +/* This is used to identify used ISA when doing code generation.
>> +   Initialize it with macro TARGET_DEFAULT_ISA,
>> +   which is defined in nds32-isa-xxx.h file.
>> +   User can specify it by using '-misa=X' option.  */
>> +enum nds32_isa nds32_selected_isa = TARGET_DEFAULT_ISA;
>
> Rather than using global state, put this in the gcc_options structure
> using a Variable entry in your .opt file.
>

Thanks for the suggestion.  We removed global state and
added handling in nds32.opt to achieve the same purpose of
setting instruction set architecture.

Also, after discussing with other toolchain engineers,
we replaced -misa option with -march option.

>> +	  warning (0, "For the option -misr-vector-size=X, the valid X "
>> +		      "must be: 4 or 16");
>
> The diagnostics in this function should all not start with a capital
> letter.  Invalid arguments to an option should be errors, not warnings.
> Since you have a location passed to this function, use error_at rather
> than the legacy functions that implicitly use input_location.
>

Fixed it accordingly.

>> +    case OPT_misa_:
>> +      /* Check valid ISA: v2 v3 v3m.  */
>> +      if (strcmp (arg, "v2") == 0)
>
> Use Enum in the .opt file and get all the conversion from strings to
> integer values, and errors for unknown values, done automatically.
>

Thanks for the suggestion.  You guided us a better approach
to deal with it automatically.  We modified it accordingly.


Thanks for the review comments.
A revised patch is provided and here is a summary:

   1. Use error_at () for diagnostics statement and start with lowercase.
   2. Some process can be done in nds32.opt.
      Remove unnecessary parts from nds32-common.c file.


gcc/
2013-07-24  Chung-Ju Wu  <jasonwucj@gmail.com>
	    Shiva Chen  <shiva0217@gmail.com>

	* config/nds32/nds32.md: New file.
	* common/config/nds32: New directory and files.


Best regards,
jasonwucj

Patch

diff --git gcc/common/config/nds32/nds32-common.c gcc/common/config/nds32/nds32-common.c
new file mode 100644
index 0000000..0b4321e
--- /dev/null
+++ gcc/common/config/nds32/nds32-common.c
@@ -0,0 +1,122 @@ 
+/* Common hooks of Andes NDS32 cpu for GNU compiler
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   Contributed by Andes Technology Corporation.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_HANDLE_OPTION.  */
+static bool
+nds32_handle_option (struct gcc_options *opts ATTRIBUTE_UNUSED,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded,
+		     location_t loc)
+{
+  size_t     code  = decoded->opt_index;
+  int        value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_misr_vector_size_:
+      /* Check the valid vector size: 4 or 16.  */
+      if (value != 4 && value != 16)
+	{
+	  error_at (loc, "for the option -misr-vector-size=X, the valid X "
+			 "must be: 4 or 16");
+	  return false;
+	}
+
+      return true;
+
+    case OPT_mcache_block_size_:
+      /* Check valid value: 4 8 16 32 64 128 256 512.  */
+      if (exact_log2 (value) < 2 || exact_log2 (value) > 9)
+	{
+	  error_at (loc, "for the option -mcache-block-size=X, the valid X "
+			 "must be: 4, 8, 16, 32, 64, 128, 256, or 512");
+	  return false;
+	}
+
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options nds32_option_optimization_table[] =
+{
+  /* Enable -fomit-frame-pointer by default at -O1 or higher.  */
+  { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+  /* Enable -mv3push by default at -Os, but it is useless under V2 ISA.  */
+  { OPT_LEVELS_SIZE,   OPT_mv3push,             NULL, 1 },
+
+  { OPT_LEVELS_NONE,   0,                       NULL, 0 }
+};
+
+/* ------------------------------------------------------------------------ */
+
+/* ======================================================================== */
+/* Run-time Target Specification.  */
+/* ======================================================================== */
+
+/* Default enable
+     TARGET_GP_DIRECT: Generate gp-imply instruction.
+     TARGET_16_BIT   : Generate 16/32 bit mixed length instruction.
+     TARGET_PERF_EXT : Generate performance extention instrcution.
+     TARGET_CMOV     : Generate conditional move instruction.  */
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS		\
+  (MASK_GP_DIRECT				\
+   | MASK_16_BIT				\
+   | MASK_PERF_EXT				\
+   | MASK_CMOV)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION nds32_handle_option
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE nds32_option_optimization_table
+
+/* ======================================================================== */
+/* Defining the Output Assembler Language.  */
+/* ======================================================================== */
+
+/* Assembler Commands for Exception Regions.  */
+
+#undef TARGET_EXCEPT_UNWIND_INFO
+#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
+
+/* ------------------------------------------------------------------------ */
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+
+/* ------------------------------------------------------------------------ */
diff --git gcc/config/nds32/nds32.md gcc/config/nds32/nds32.md
new file mode 100644
index 0000000..66f4e00
--- /dev/null
+++ gcc/config/nds32/nds32.md
@@ -0,0 +1,2835 @@ 
+;; Machine description of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Include predicates definition.
+(include "predicates.md")
+
+;; Include constraints definition.
+(include "constraints.md")
+
+;; Include iterators definition.
+(include "iterators.md")
+
+;; Include pipelines definition.
+(include "pipelines.md")
+
+
+;; Include constants definition.
+(include "constants.md")
+
+
+;; Include intrinsic functions definition.
+(include "nds32.intrinsic.md")
+
+;; Include block move for nds32 multiple load/store behavior.
+(include "nds32.multiple.md")
+
+;; Include DImode/DFmode operations.
+(include "nds32.doubleword.md")
+
+;; Include peephole patterns.
+(include "nds32.peephole2.md")
+
+
+;; Insn type, it is used to default other attribute values.
+(define_attr "type"
+  "unknown,move,load,store,alu,compare,branch,call,misc"
+  (const_string "unknown"))
+
+
+;; Length, in bytes, default is 4-bytes.
+(define_attr "length" "" (const_int 4))
+
+
+;; Enabled, which is used to enable/disable insn alternatives.
+;; Note that we use length and TARGET_16_BIT here as criteria.
+;; If the instruction pattern already check TARGET_16_BIT to
+;; determine the length by itself, its enabled attribute should be
+;; always 1 to avoid the conflict with the settings here.
+(define_attr "enabled" ""
+  (cond [(and (eq_attr "length" "2")
+	      (match_test "!TARGET_16_BIT"))
+	 (const_int 0)]
+	(const_int 1)))
+
+
+;; ----------------------------------------------------------------------------
+
+
+;; Move instructions.
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_operand" "")
+	(match_operand:HI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(match_operand:SI 1 "general_operand" ""))]
+  ""
+{
+  /* Need to force register if mem <- !reg.  */
+  if (GET_CODE (operands[0]) == MEM && !REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "*store_si"
+  [(set (match_operand:SI 1 "memory_operand"   "=U45, U33, U37, U45, m")
+	(match_operand:SI 0 "register_operand" "   l,   l,   l,   d, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_store (operands, 4);
+
+    default:
+      return nds32_output_32bit_store (operands, 4);
+    }
+}
+  [(set_attr "type"   "store,store,store,store,store")
+   (set_attr "length" "    2,    2,    2,    2,    4")])
+
+(define_insn "*store_<mode>"
+  [(set (match_operand:QIHI 1 "memory_operand"   "=U33, m")
+	(match_operand:QIHI 0 "register_operand" "   l, r"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_store (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_store (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "store,store")
+   (set_attr "length" "    2,    4")])
+
+(define_insn "*load_si"
+  [(set (match_operand:SI 1 "register_operand" "=  l,   l,   l,   d, r")
+	(match_operand:SI 0 "memory_operand"   " U45, U33, U37, U45, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+      return nds32_output_16bit_load (operands, 4);
+
+    default:
+      return nds32_output_32bit_load (operands, 4);
+    }
+}
+  [(set_attr "type"   "load,load,load,load,load")
+   (set_attr "length" "   2,   2,   2,   2,   4")])
+
+(define_insn "*load_<mode>"
+  [(set (match_operand:QIHI 1 "register_operand" "=  l, r")
+	(match_operand:QIHI 0 "memory_operand"   " U33, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return nds32_output_16bit_load (operands, <byte>);
+
+    default:
+      return nds32_output_32bit_load (operands, <byte>);
+    }
+}
+  [(set_attr "type"   "load,load")
+   (set_attr "length" "   2,   4")])
+
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHISI 1 "register_operand" "=r, m, r")
+	(match_operand:QIHISI 0 "register_operand" " r, r, m"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "mov55\t%1, %0";
+      else
+	return "ori\t%1, %0, 0";
+    case 1:
+      return nds32_output_32bit_store (operands, <byte>);
+    case 2:
+      return nds32_output_32bit_load (operands, <byte>);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "alu,store,load")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (const_int 4)
+       ;; Alternative 2
+       (const_int 4)
+     ])])
+
+;; For QImode and HImode, the immediate value can be fit in imm20s.
+;; So there is no need to support QI and HI in the split patterns.
+;; Also, we use const_int_operand to limit that only CONST_INT
+;; is able to match such instruction template.
+;;
+;; Besides, in the split condition, we ask big-constant split to be
+;; performed after reload phase. So that the mov2add optimization
+;; in postreload have chance to optimize the code.
+
+(define_insn_and_split "*movsi_const"
+  [(set (match_operand:QIHISI 0 "register_operand"  "=   d,    r,    r,    r,    t,    r")
+	(match_operand:QIHISI 1 "const_int_operand" " Ip05, Is05, Is20, Ihig, Ispl, Ispl"))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (get_attr_length (insn) == 2)
+	return "movpi45\t%0, %1";
+      /* else fall through.  */
+    case 1:
+      if (get_attr_length (insn) == 2)
+	return "movi55\t%0, %1";
+      /* else fall through.  */
+    case 2:
+      return "movi\t%0, %1";
+    case 3:
+      return "sethi\t%0, hi20(%1)";
+    case 4:
+      /* Use $r15, if the value is NOT in the range of Is20,
+         we must output "sethi + ori" directly since
+         we may already passed the split stage.  */
+      return "sethi\t%0, hi20(%1)\;ori\t%0, %0, lo12(%1)";
+
+    default:
+      return "#";
+    }
+}
+   "reload_completed
+    && satisfies_constraint_Ispl (operands[1])
+    && !satisfies_constraint_Is20 (operands[1])"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:QIHISI (match_dup 0) (match_dup 2)))]
+{
+  operands[2] = GEN_INT (INTVAL (operands[1]) & 0xfff);
+  operands[1] = GEN_INT ((INTVAL (operands[1]) >> 12) << 12);
+}
+  [(set_attr "type" "alu,alu,alu,alu,alu,alu")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 2
+       (const_int 4)
+       ;; Alternative 3
+       (const_int 4)
+       ;; Alternative 4
+       (const_int 8)
+       ;; Alternative 5
+       (const_int 8)
+     ])])
+
+;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
+;; are able to match such instruction template.
+(define_insn "*move_addr"
+  [(set (match_operand:SI 0 "register_operand"       "=l, r")
+	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+  ""
+  "la\t%0, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "8")])
+
+
+(define_insn "*sethi"
+  [(set (match_operand:SI 0 "register_operand"           "=r")
+	(high:SI (match_operand:SI 1 "immediate_operand" " i")))]
+  ""
+{
+  return "sethi\t%0, hi20(%1)";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+(define_insn "*lo_sum"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand"  " 0")
+		   (match_operand:SI 2 "immediate_operand" " i")))]
+  ""
+  "ori\t%0, %1, lo12(%2)"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Zero extension instructions.
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(zero_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (zero_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (zero_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*zero_extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*zero_extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(zero_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "ze<size>33\t%0, %1";
+    case 1:
+      return "ze<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*zero_extend<mode>si2_load"
+  [(set (match_operand:SI 1 "register_operand"                 "=  l, *r")
+	(zero_extend:SI (match_operand:QIHI 0 "memory_operand" " U33,  m")))]
+  ""
+{
+  if (which_alternative == 0)
+    return nds32_output_16bit_load (operands, <byte>);
+  else
+    return nds32_output_32bit_load (operands, <byte>);
+}
+  [(set_attr "length" "2, 4")
+   (set_attr "type" "load,load")])
+
+;; Sign extension instructions.
+
+(define_expand "extend<mode>si2"
+  [(set (match_operand:SI 0 "general_operand" "")
+	(sign_extend:SI (match_operand:QIHI 1 "general_operand" "")))]
+  ""
+{
+  rtx tmp_reg;
+
+  /* We need to make sure operands[1] is a register.  */
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+
+  /* If the pattern is "(mem X) <- (sign_extend (reg Y))",
+     we create two rtx patterns:
+       (reg:SI K) <- (sign_extend:SI (reg Y))
+       (mem:SI X) <- (reg:SI K)
+     The first rtx will be matched by '*extend<mode>si2_reg' template,
+     and the second rtx will be matched by mov naming pattern.  */
+  if (MEM_P (operands[0]))
+    {
+      tmp_reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_extend<mode>si2 (tmp_reg, operands[1]));
+      emit_insn (gen_movsi (operands[0], tmp_reg));
+
+      DONE;
+    }
+})
+
+(define_insn "*extend<mode>si2_reg"
+  [(set (match_operand:SI 0 "register_operand"                   "=w, r")
+	(sign_extend:SI (match_operand:QIHI 1 "register_operand" " w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "se<size>33\t%0, %1";
+    case 1:
+      return "se<size>\t%0, %1";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "*extend<mode>si2_load"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(sign_extend:SI (match_operand:QIHI 1 "memory_operand" " m")))]
+  ""
+{
+  rtx mem_addr_op;
+  rtx op0;
+  rtx op1;
+
+  /* Retrieve rtx X from (mem (X ...)).  */
+  mem_addr_op = XEXP (operands[1], 0);
+
+  switch (GET_CODE (mem_addr_op))
+    {
+    case SYMBOL_REF:
+    case CONST:
+      /* (mem (symbol_ref X))
+         (mem (const (...)))
+         => access global variables,
+            use "lbsi.gp / lhsi.gp" */
+      return "l<size>si.gp\t%0, %1";
+
+    case REG:
+      /* (mem (reg X))
+         => access location by using register,
+            use "lbsi / lhsi" */
+      return "l<size>si\t%0, %1";
+
+    case PLUS:
+      /* get operands first */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (plus reg reg))
+         => access location by adding two registers,
+            use "lbs / lhs" */
+      if (REG_P (op0) && REG_P (op1))
+	return "l<size>s\t%0, %1";
+
+      /* (mem (plus reg const_int))
+         => access location by adding one register with const_int,
+            use "lbsi / lhsi" */
+      if (REG_P (op0) && CONST_INT_P (op1))
+	return "l<size>si\t%0, %1";
+
+      /* (mem (plus (mult reg const_int) reg))
+         => access location by adding one register with
+            multiplication of register and const_int,
+            use "lbs / lhs" */
+      if (GET_CODE (op0) == MULT && REG_P (op1)
+	  &&       REG_P (XEXP (op0, 0))
+	  && CONST_INT_P (XEXP (op0, 1)))
+	return "l<size>s\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_MODIFY:
+      /* Get operands first.  */
+      op0 = XEXP (mem_addr_op, 0);
+      op1 = XEXP (mem_addr_op, 1);
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (reg))))
+         => access location by using register which will be
+            post modified with reg,
+            use "lbs.bi/ lhs.bi / lws.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && REG_P (XEXP (op1, 1)))
+	return "l<size>s.bi\t%0, %1";
+
+      /* (mem (post_modify (reg)
+                           (plus (reg) (const_int))))
+         => access location by using register which will be
+            post modified with const_int,
+            use "lbsi.bi/ lhsi.bi / lwsi.bi" */
+      if (REG_P (op0) && GET_CODE (op1) == PLUS
+	  && CONST_INT_P (XEXP (op1, 1)))
+	return "l<size>si.bi\t%0, %1";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_INC:
+      /* (mem (post_inc reg))
+         => access location by using register which will be
+            post increment,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, <byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case POST_DEC:
+      /* (mem (post_dec reg))
+         => access location by using register which will be
+            post decrement,
+            use "lbsi.bi / lhsi.bi" */
+      if (REG_P (XEXP (mem_addr_op, 0)))
+	return "l<size>si.bi\t%0, %1, -<byte>";
+
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+
+    case LO_SUM:
+      operands[2] = XEXP (mem_addr_op, 1);
+      operands[1] = XEXP (mem_addr_op, 0);
+      return "l<size>si\t%0, [%1 + lo12(%2)]";
+
+    default:
+      /* Any other cases, stop and report problem.  */
+      goto other_cases;
+    }
+
+other_cases:
+  gcc_unreachable ();
+}
+  [(set_attr "type" "load")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Arithmetic instructions.
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nds32_nonmemory_nonsymbol_operand" "")))]
+  ""
+{
+  if (GET_CODE (operands[2]) == CONST_INT)
+    operands[2] = gen_int_mode (INTVAL (operands[2]), SImode);
+})
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=   d,    l,  d, l,    k,    l,    r, r")
+	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"         "    0,    l, %0, l,    0,    k,    r, r")
+		     (match_operand:QIHISI 2 "nds32_reg_or_int_operand" " Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+  ""
+  "@
+  addi45\t%0, %2
+  addi333\t%0, %1, %2
+  add45\t%0, %2
+  add333\t%0, %1, %2
+  addi10.sp\t%2
+  addri36.sp\t%0, %2
+  addi\t%0, %1, %2
+  add\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  2,  2,  4,  4")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" "")
+		 (match_operand:SI 2 "nds32_rimm15s_operand" "")))]
+  ""
+  ""
+)
+
+(define_insn "*sub<mode>3"
+  [(set (match_operand:QIHISI 0 "register_operand"                    "=   d,    l, d, l,    r, r")
+	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" "    0,    l, 0, l, Is15, r")
+		      (match_operand:QIHISI 2 "nds32_rimm15s_operand" " Iu05, Iu03, r, l,    r, r")))]
+  ""
+  "@
+  subi45\t%0, %2
+  subi333\t%0, %1, %2
+  sub45\t%0, %2
+  sub333\t%0, %1, %2
+  subri\t%0, %2, %1
+  sub\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  2,  2,  2,  4,  4")])
+
+
+;; GCC intends to simplify (plus (ashift ...) (reg))
+;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*add_slli"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
+			  (match_operand:SI 2 "immediate_operand" " i"))
+		 (match_operand:SI 3 "register_operand"           " r")))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[2])) != -1)
+   && (exact_log2 (INTVAL (operands[2])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'add_slli' instruction.  */
+  operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2])));
+
+  return "add_slli\t%0, %3, %1, %2";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*add_srli"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			      (match_operand:SI 2 "immediate_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "add_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; GCC intends to simplify (minus (reg) (ashift ...))
+;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult'
+;; and needs to ensure it is exact_log2 value.
+(define_insn "*sub_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=r")
+	(minus:SI (match_operand:SI 1 "register_operand"           " r")
+		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
+			   (match_operand:SI 3 "immediate_operand" " i"))))]
+  "TARGET_ISA_V3
+   && (exact_log2 (INTVAL (operands[3])) != -1)
+   && (exact_log2 (INTVAL (operands[3])) <= 31)"
+{
+  /* Get floor_log2 of the immediate value
+     so that we can generate 'sub_slli' instruction.  */
+  operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3])));
+
+  return "sub_slli\t%0, %1, %2, %3";
+}
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*sub_srli"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
+			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
+  "TARGET_ISA_V3"
+  "sub_srli\t%0, %1, %2, %3"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; Multiplication instructions.
+
+(define_insn "mulsi3"
+  [(set (match_operand:SI 0 "register_operand"          "= w, r")
+	(mult:SI (match_operand:SI 1 "register_operand" " %0, r")
+		 (match_operand:SI 2 "register_operand" "  w, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mul33\t%0, %2";
+    case 1:
+      return "mul\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+(define_insn "mulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V3"
+  "mulsr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "umulsidi3"
+  [(set (match_operand:DI 0 "register_operand"                          "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
+  "TARGET_ISA_V3"
+  "mulr64\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Multiply-accumulate instructions.
+
+(define_insn "*maddr32_0"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (match_operand:SI 3 "register_operand"          " 0")
+                 (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*maddr32_1"
+  [(set (match_operand:SI 0 "register_operand"                   "=r")
+        (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r")
+                          (match_operand:SI 2 "register_operand" " r"))
+                 (match_operand:SI 3 "register_operand"          " 0")))]
+  ""
+  "maddr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "*msubr32"
+  [(set (match_operand:SI 0 "register_operand"                    "=r")
+        (minus:SI (match_operand:SI 3 "register_operand"          " 0")
+                  (mult:SI (match_operand:SI 1 "register_operand" " r")
+                           (match_operand:SI 2 "register_operand" " r"))))]
+  ""
+  "msubr32\t%0, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; Div Instructions.
+
+(define_insn "divmodsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+        (div:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand" " r")))
+   (set (match_operand:SI 3 "register_operand"         "=r")
+        (mod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divsr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+(define_insn "udivmodsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+        (udiv:SI (match_operand:SI 1 "register_operand" " r")
+                (match_operand:SI 2 "register_operand"  " r")))
+   (set (match_operand:SI 3 "register_operand"          "=r")
+        (umod:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "divr\t%0, %3, %1, %2"
+  [(set_attr "type"   "alu")
+   (set_attr "length"   "4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Boolean instructions.
+;; Note: We define the DImode versions in nds32.doubleword.md.
+
+;; ----------------------------------------------------------------------------
+;; 'AND' operation
+;; ----------------------------------------------------------------------------
+
+(define_insn "bitc"
+  [(set (match_operand:SI 0 "register_operand"                 "=r")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" " r"))
+		(match_operand:SI 2 "register_operand"         " r")))]
+  "TARGET_ISA_V3"
+  "bitc\t%0, %2, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")]
+)
+
+(define_expand "andsi3"
+  [(set (match_operand:SI         0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+{
+  /* If operands[2] is const_int,
+     we might be able to use other more efficient instructions.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      int mask = INTVAL (operands[2]);
+
+      if (mask == 255)
+	{
+	  /* ($r0 & 0xff)  ==>  (zeb $r0, $r0) */
+	  operands[1] = convert_to_mode (QImode, operands[1], 1);
+	  emit_insn (gen_zero_extendqisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+      else if (mask == 65535)
+	{
+	  /* ($r0 & 0xffff)  ==>  (zeh $r0, $r0) */
+	  operands[1] = convert_to_mode (HImode, operands[1], 1);
+	  emit_insn (gen_zero_extendhisi2 (operands[0], operands[1]));
+	  DONE;
+	}
+    }
+})
+
+(define_insn "*andsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    l,    l,    l,    l,    l,    r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand" " %0, r,    l,    l,    l,    0,    0,    r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Izeb, Ixls, Ix11, Ibms, Ifex, Iu15, Ii15, Ic15")))]
+  ""
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int zero_position;
+
+  /* 16-bit andi instructions:
+     andi Rt3,Ra3,0xff  -> zeb33  Rt3,Ra3
+     andi Rt3,Ra3,0x01  -> xlsb33 Rt3,Ra3
+     andi Rt3,Ra3,0x7ff -> x11b33 Rt3,Ra3
+     andi Rt3,Rt3,2^imm3u          -> bmski33 Rt3,imm3u
+     andi Rt3,Rt3,(2^(imm3u+1))-1  -> fexti33 Rt3,imm3u.  */
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "and33\t%0, %2";
+    case 1:
+      return "and\t%0, %1, %2";
+    case 2:
+      return "zeb33\t%0, %1";
+    case 3:
+      return "xlsb33\t%0, %1";
+    case 4:
+      return "x11b33\t%0, %1";
+    case 5:
+      operands[2] = GEN_INT(floor_log2 (mask));
+      return "bmski33\t%0, %2";
+    case 6:
+      operands[2] = GEN_INT(floor_log2 (mask + 1) - 1);
+      return "fexti33\t%0, %2";
+    case 7:
+      return "andi\t%0, %1, %2";
+    case 8:
+      operands[2] = GEN_INT(~mask);
+      return "bitci\t%0, %1, %2";
+    case 9:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bclr_p() test,
+         so that we can guarantee there is only one 0-bit
+         within the immediate value.  */
+      for (zero_position = 31; zero_position >= 0; zero_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
+	    {
+	      /* Found the 0-bit position.  */
+	      operands[2] = GEN_INT (zero_position);
+	      break;
+	    }
+	}
+      return "bclr\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  4,  4,  4")])
+
+(define_insn "*and_slli"
+  [(set (match_operand:SI 0 "register_operand"                      "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
+			    (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"              "    r")))]
+  "TARGET_ISA_V3"
+  "and_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*and_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "and_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'OR' operation
+;; ----------------------------------------------------------------------------
+
+;; For iorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+;; This is the iorsi3 pattern for V3/V3M ISA,
+;; which DOES HAVE 'or33' instruction.
+;; So we can identify 'or Rt3,Ra3,Rb3' case and set its length to be 2.
+(define_insn "*iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, Ie15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "or33\t%0, %2";
+    case 1:
+      return "or\t%0, %1, %2";
+    case 2:
+      return "ori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_bset_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "bset\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*or_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "or_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*or_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "or_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'XOR' operation
+;; ----------------------------------------------------------------------------
+
+;; For xorsi3 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; simply set different instruction length according to ISA.
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
+  ""
+  ""
+)
+
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "= w, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand" " %0, r,    r,    r")
+		(match_operand:SI 2 "general_operand"  "  w, r, Iu15, It15")))]
+  ""
+{
+  int one_position;
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "xor33\t%0, %2";
+    case 1:
+      return "xor\t%0, %1, %2";
+    case 2:
+      return "xori\t%0, %1, %2";
+    case 3:
+      /* If we reach this alternative,
+         it must pass the nds32_can_use_btgl_p() test,
+         so that we can guarantee there is only one 1-bit
+         within the immediate value.  */
+      for (one_position = 31; one_position >= 0; one_position--)
+	{
+	  if ((INTVAL (operands[2]) & (1 << one_position)) != 0)
+	    {
+	      /* Found the 1-bit position.  */
+	      operands[2] = GEN_INT (one_position);
+	      break;
+	    }
+	}
+      return "btgl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu,alu")
+   (set_attr "length" "  2,  4,  4,  4")])
+
+(define_insn "*xor_slli"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
+			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"             "    r")))]
+  "TARGET_ISA_V3"
+  "xor_slli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "*xor_srli"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
+			     (match_operand:SI 2 "immediate_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3"
+  "xor_srli\t%0, %3, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+;; Rotate Right Instructions.
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
+		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+  ""
+  "@
+  rotri\t%0, %1, %2
+  rotr\t%0, %1, %2"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'NEG' operation
+;; ----------------------------------------------------------------------------
+
+;; For negsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(neg:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; Note that there is NO 'neg33' instruction for V2 ISA.
+;; So 'subri A,B,0' (its semantic is 'A = 0 - B')
+;; is the only option for V2 ISA.
+(define_insn "*negsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   neg33\t%0, %1
+   subri\t%0, %1, 0"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+
+;; ----------------------------------------------------------------------------
+;; 'ONE_COMPLIMENT' operation
+;; ----------------------------------------------------------------------------
+
+;; For one_cmplsi2 naming pattern, we have to use define_expand first,
+;; and then design different anonymous patterns so that it can
+;; output assembly code according to ISA.
+(define_expand "one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(not:SI (match_operand:SI 1 "register_operand" "")))]
+  ""
+  ""
+)
+
+;; This is the one_cmplsi2 pattern
+(define_insn "*one_cmplsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=w, r")
+	(not:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  ""
+  "@
+   not33\t%0, %1
+   nor\t%0, %1, %1"
+  [(set_attr "type"   "alu,alu")
+   (set_attr "length" "  2,  4")])
+
+;; ----------------------------------------------------------------------------
+
+;; Shift instructions.
+
+(define_insn "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"            "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand" "    l,    r, r")
+		   (match_operand:SI 2 "general_operand"  " Iu03, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "slli333\t%0, %1, %2";
+    case 1:
+      return "slli\t%0, %1, %2";
+    case 2:
+      return "sll\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srai45\t%0, %2";
+    case 1:
+      return "srai\t%0, %1, %2";
+    case 2:
+      return "sra\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"              "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand" "    0,    r, r")
+		     (match_operand:SI 2 "general_operand"  " Iu05, Iu05, r")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "srli45\t%0, %2";
+    case 1:
+      return "srli\t%0, %1, %2";
+    case 2:
+      return "srl\t%0, %1, %2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "alu,alu,alu")
+   (set_attr "length" "  2,  4,  4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; ----------------------------------------------------------------------------
+;; Conditional Move patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "register_operand" "")
+			 (match_operand:SI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    {
+      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
+         we have gcc generate original template rtx.  */
+      goto create_template;
+    }
+  else
+    {
+      /* Since there is only 'slt'(Set when Less Than) instruction for
+         comparison in Andes ISA, the major strategy we use here is to
+         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
+         We design constraints properly so that the reload phase will assist
+         to make one source operand to use same register as result operand.
+         Then we can use cmovz/cmovn to catch the other source operand
+         which has different register.  */
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx cmp_op0 = XEXP (operands[1], 0);
+      rtx cmp_op1 = XEXP (operands[1], 1);
+      rtx tmp;
+      int reverse = 0;
+
+      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
+         Strategy : Reverse condition and swap comparison operands
+
+         For example:
+
+             a <= b ? P : Q   (LE or LEU)
+         --> a >  b ? Q : P   (reverse condition)
+         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
+
+             a >= b ? P : Q   (GE or GEU)
+         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
+
+             a <  b ? P : Q   (LT or LTU)
+         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
+
+             a >  b ? P : Q   (GT or GTU)
+         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
+      switch (code)
+	{
+	case NE:
+	  /*   (a != b ? P : Q)
+	     can be expressed as
+	       (a == b ? Q : P)
+	     so, fall through to reverse condition */
+	case GE: case GEU: case LE: case LEU:
+	  new_code = reverse_condition (code);
+	  reverse = 1;
+	  break;
+	case EQ: case GT: case GTU: case LT: case LTU:
+	  /* no need to reverse condition */
+	  break;
+	default:
+	  FAIL;
+	}
+
+      /* For '>' comparison operator, we swap operands
+         so that we can have 'LT/LTU' operator.  */
+      if (new_code == GT || new_code == GTU)
+	{
+	  tmp     = cmp_op0;
+	  cmp_op0 = cmp_op1;
+	  cmp_op1 = tmp;
+
+	  new_code = swap_condition (new_code);
+	}
+
+      /* Use a temporary register to store slt/slts result.  */
+      tmp = gen_reg_rtx (SImode);
+
+      /* Split EQ and NE because we don't have direcly compare of EQ and NE.
+         If we don't do this, the conditional move transformation will fail
+	 when produce (SET A (EQ B C)) or (SET A (NE B C)).  */
+      if (new_code == EQ)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
+	}
+      else if (new_code == NE)
+	{
+	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
+	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
+        }
+      else
+	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
+	emit_insn (gen_rtx_SET (VOIDmode, tmp,
+				gen_rtx_fmt_ee (new_code, SImode,
+						cmp_op0, cmp_op1)));
+
+      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
+         so that cmovz or cmovn will be matched later.
+
+         For reverse condition cases, we want to create a semantic that:
+           (eq X 0) --> pick up "else" part
+         For normal cases, we want to create a semantic that:
+           (ne X 0) --> pick up "then" part
+
+         The reason we do not physically change their rtx position is that
+         gcc will also do opimization by reverse condition,
+         which may break up our transformation semantic
+         if we physically change rtx right now.
+         So we just pick up the corresponding comparison operator
+         based on the reverse status, leaving the "swap position" job
+         after reload phase by using define_insn_and_split strategy.  */
+      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
+				    VOIDmode, tmp, const0_rtx);
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:QIHI 0 "register_operand" "")
+	(if_then_else:QIHI (match_operand 1 "comparison_operator" "")
+			   (match_operand:QIHI 2 "register_operand" "")
+			   (match_operand:QIHI 3 "register_operand" "")))]
+  "TARGET_CMOV"
+{
+  rtx insn;
+
+  /* For QImode and HImode conditional move,
+     make them to be SImode behavior.  */
+  operands[0] = simplify_gen_subreg (SImode, operands[0], <MODE>mode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], <MODE>mode, 0);
+
+  insn = gen_movsicc (operands[0], operands[1], operands[2], operands[3]);
+
+  if (!insn)
+    FAIL;
+
+  emit_insn (insn);
+  DONE;
+})
+
+(define_insn "cmovz"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovz\t%0, %2, %1
+   cmovz\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn "cmovn"
+  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand"     " r, 0")
+			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+  "TARGET_CMOV"
+  "@
+   cmovn\t%0, %2, %1
+   cmovn\t%0, %3, %1"
+  [(set_attr "type" "move")
+   (set_attr "length"  "4")])
+
+(define_insn_and_split "*movsicc"
+  [(set (match_operand:SI 0 "register_operand"                     "=r, r")
+	(if_then_else:SI (match_operator 1 "nds32_equality_comparison_operator"
+			   [(match_operand:SI 2 "register_operand" " r, r")
+			    (const_int 0)])
+			 (match_operand:SI 3 "register_operand"    " 0, r")
+			 (match_operand:SI 4 "register_operand"    " r, 0")))]
+  "TARGET_CMOV"
+  "#"
+  "reload_completed"
+  [(pc)]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx then_op = operands[3];
+  rtx else_op = operands[4];
+  rtx tmp;
+
+  /* According to the implementation in "movsicc" naming pattern,
+     if we make transformation in which the comparison code is EQ,
+     the desired target is at "else" part position semantically.
+     Now it is the time (after reload_completed) to physically
+     swap it to "then" part position.  */
+  if (code == EQ)
+    {
+      tmp     = then_op;
+      then_op = else_op;
+      else_op = tmp;
+    }
+
+  /* Choosing cmovz or cmovn is based on reload phase result.
+     After reload phase, one source operand will use
+     the same register as result operand.
+     We can use cmovz/cmovn to catch the other source operand
+     which has different register.
+     So We check register number to determine using cmovz or cmovn.  */
+  if (REGNO(then_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovz (operands[0], operands[2], else_op, operands[0]));
+  else if (REGNO(else_op) == REGNO(operands[0]))
+    emit_insn (gen_cmovn (operands[0], operands[2], then_op, operands[0]));
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
+
+;; ----------------------------------------------------------------------------
+;; Conditional Branch patterns
+;; ----------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* If operands[2] is (const_int 0),
+     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
+     So we have gcc generate original template rtx.  */
+  if (GET_CODE (operands[2]) == CONST_INT)
+    if (INTVAL (operands[2]) == 0)
+      if ((code != GTU)
+	  && (code != GEU)
+	  && (code != LTU)
+	  && (code != LEU))
+	goto create_template;
+
+  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
+     behavior for the comparison, we might need to generate other
+     rtx patterns to achieve same semantic.  */
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == GT)
+	    {
+	      /* GT, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* GTU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case GE:
+    case GEU:
+      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
+      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == GE)
+	{
+	  /* GE, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* GEU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], EQ);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LT:
+    case LTU:
+      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
+      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
+      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	}
+
+      PUT_CODE (operands[0], NE);
+      operands[1] = tmp_reg;
+      operands[2] = const0_rtx;
+      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				 operands[2], operands[3]));
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[2]) == CONST_INT)
+	{
+	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  /* We want to plus 1 into the integer value
+	     of operands[2] to create 'slt' instruction.
+	     This caculation is performed on the host machine,
+	     which may be 64-bit integer.
+	     So the meaning of caculation result may be
+	     different from the 32-bit nds32 target.
+
+	     For example:
+	       0x7fffffff + 0x1 -> 0x80000000,
+	       this value is POSITIVE on 64-bit machine,
+	       but the expected value on 32-bit nds32 target
+	       should be NEGATIVE value.
+
+	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
+	     explicitly create SImode constant rtx.  */
+	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
+	    }
+
+	  PUT_CODE (operands[0], NE);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+      else
+	{
+	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
+	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
+
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
+	    }
+
+	  PUT_CODE (operands[0], EQ);
+	  operands[1] = tmp_reg;
+	  operands[2] = const0_rtx;
+	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
+				     operands[2], operands[3]));
+
+	  DONE;
+	}
+
+    case EQ:
+    case NE:
+      /* NDS32 ISA has various form for eq/ne behavior no matter
+         what kind of the operand is.
+         So just generate original template rtx.  */
+      goto create_template;
+
+    default:
+      FAIL;
+    }
+
+create_template:
+  do {} while(0); /* dummy line */
+})
+
+
+(define_insn "*cbranchsi4_equality_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-comparison conditional branch has two forms:
+       32-bit instruction =>          beqz/bnez           imm16s << 1
+       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -65500 ~ 65500)
+
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 4:
+      /* including constraints: t, l, and r */
+      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
+    case 6:
+      if (which_alternative == 0)
+	{
+	  /* constraint: t */
+	  if (code == EQ)
+	    {
+	      /*    beqzs8  .L0
+	       *  =>
+	       *    bnezs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnezs8  .L0
+	       *  =>
+	       *    beqzs8  .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else if (which_alternative == 1)
+	{
+	  /* constraint: l */
+	  if (code == EQ)
+	    {
+	      /*    beqz38  $r0, .L0
+	       *  =>
+	       *    bnez38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	  else
+	    {
+	      /*    bnez38  $r0, .L0
+	       *  =>
+	       *    beqz38  $r0, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	    }
+	}
+      else
+	{
+	  /* constraint: r */
+	  /* For which_alternative==2, it should not be here.  */
+	  gcc_unreachable ();
+	}
+    case 8:
+      /* constraint: t, l, r.  */
+      if (code == EQ)
+	{
+	  /*    beqz  $r8, .L0
+	   *  =>
+	   *    bnez  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+      else
+	{
+	  /*    bnez  $r8, .L0
+	   *  =>
+	   *    beqz  $r8, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+			  (le (minus (match_dup 2) (pc)) (const_int  250)))
+		     (if_then_else (match_test "TARGET_16_BIT")
+				   (const_int 2)
+				   (const_int 4))
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					(le (minus (match_dup 2) (pc)) (const_int  65500)))
+				   (const_int 4)
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 6)
+						 (const_int 8))))
+       ;; Alternative 2
+       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+;; This pattern is dedicated to non-V3 ISAs,
+;; because they HAVE NO beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "!TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+
+     For 32-bit case,
+     we assume it is always reachable. (but check range -16350 ~ 16350).  */
+
+  switch (code)
+    {
+    case EQ:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "beq\t%1, %2, %3";
+	case 8:
+	  /*    beq  $r0, $r1, .L0
+	   *  =>
+	   *    bne  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case NE:
+      /* r, r */
+      switch (get_attr_length (insn))
+	{
+	case 4:
+	  return "bne\t%1, %2, %3";
+	case 8:
+	  /*    bne  $r0, $r1, .L0
+	   *  =>
+	   *    beq  $r0, $r1, .LCB0
+	   *    j  .L0
+	   *  .LCB0:
+	   */
+	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+;; This pattern is dedicated to V3,
+;; because V3 DOES HAVE beqc/bnec instruction.
+(define_insn "*cbranchsi4_equality_reg_or_const_int"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
+			[(match_operand:SI 1 "register_operand"           "r,    r")
+			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_ISA_V3"
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This register-comparison conditional branch has one form:
+       32-bit instruction =>          beq/bne           imm14s << 1
+       32-bit instruction =>         beqc/bnec          imm8s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -16350 ~ 16350 and -250 ~ 250).  */
+
+  switch (code)
+    {
+    case EQ:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beq\t%1, %2, %3";
+	    case 8:
+	      /*    beq  $r0, $r1, .L0
+	       *  =>
+	       *    bne  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "beqc\t%1, %2, %3";
+	    case 8:
+	      /*    beqc  $r0, constant, .L0
+	       *  =>
+	       *    bnec  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    case NE:
+      if (which_alternative == 0)
+	{
+	  /* r, r */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bne\t%1, %2, %3";
+	    case 8:
+	      /*    bne  $r0, $r1, .L0
+	       *  =>
+	       *    beq  $r0, $r1, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  /* r, Is11 */
+	  switch (get_attr_length (insn))
+	    {
+	    case 4:
+	      return "bnec\t%1, %2, %3";
+	    case 8:
+	      /*    bnec  $r0, constant, .L0
+	       *  =>
+	       *    beqc  $r0, constant, .LCB0
+	       *    j  .L0
+	       *  .LCB0:
+	       */
+	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
+		     (const_int 4)
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+			  (le (minus (match_dup 3) (pc)) (const_int  250)))
+		     (const_int 4)
+		     (const_int 8))
+     ])])
+
+
+(define_insn "*cbranchsi4_greater_less_zero"
+  [(set (pc)
+	(if_then_else (match_operator 0 "nds32_greater_less_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "r")
+			 (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  ""
+{
+  enum rtx_code code;
+
+  code = GET_CODE (operands[0]);
+
+  /* This zero-greater-less-comparison conditional branch has one form:
+       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
+
+     For 32-bit case, we assume it is always reachable.
+     (but check range -65500 ~ 65500).  */
+
+  if (get_attr_length (insn) == 8)
+    {
+      /* The branch target is too far to simply use one
+         bgtz/bgez/bltz/blez instruction.
+         We need to reverse condition and use 'j' to jump to the target.  */
+      switch (code)
+	{
+	case GT:
+	  /*   bgtz  $r8, .L0
+	   * =>
+	   *   blez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case GE:
+	  /*   bgez  $r8, .L0
+	   * =>
+	   *   bltz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LT:
+	  /*   bltz  $r8, .L0
+	   * =>
+	   *   bgez  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	case LE:
+	  /*   blez  $r8, .L0
+	   * =>
+	   *   bgtz  $r8, .LCB0
+	   *   j  .L0
+	   * .LCB0:
+	   */
+	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  switch (code)
+    {
+    case GT:
+      return "bgtz\t%1, %2";
+    case GE:
+      return "bgez\t%1, %2";
+    case LT:
+      return "bltz\t%1, %2";
+    case LE:
+      return "blez\t%1, %2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type"   "branch")
+   (set (attr "length")
+        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
+		      (const_int 4)
+		      (const_int 8)))])
+
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operator:SI 1 "comparison_operator"
+	  [(match_operand:SI 2 "register_operand" "")
+	   (match_operand:SI 3 "nonmemory_operand" "")]))]
+  ""
+{
+  rtx tmp_reg;
+  enum rtx_code code;
+
+  code = GET_CODE (operands[1]);
+
+  switch (code)
+    {
+    case EQ:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A == const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A == reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, reg_C, const_int_1 */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
+
+	  DONE;
+	}
+
+    case NE:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A != const_int_B)
+	     --> addi reg_C, reg_A, -const_int_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
+	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A != reg_B)
+	     --> xor  reg_C, reg_A, reg_B
+	         slti reg_R, const_int_0, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
+	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
+
+	  DONE;
+	}
+
+    case GT:
+    case GTU:
+      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
+      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
+      if (code == GT)
+	{
+	  /* GT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
+	}
+      else
+	{
+	  /* GTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
+	}
+
+      DONE;
+
+    case GE:
+    case GEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A >= const_int_B)
+	     --> movi reg_C, const_int_B - 1
+	         slt  reg_R, reg_C, reg_A */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) - 1,
+					      SImode)));
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A >= reg_B)
+	     --> slt  reg_R, reg_A, reg_B
+	         xori reg_R, reg_R, const_int_1 */
+	  if (code == GE)
+	    {
+	      /* GE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[2], operands[3]));
+	    }
+	  else
+	    {
+	      /* GEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[2], operands[3]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+    case LT:
+    case LTU:
+      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
+      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
+      if (code == LT)
+	{
+	  /* LT, use slts instruction */
+	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
+	}
+      else
+	{
+	  /* LTU, use slt instruction */
+	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
+	}
+
+      DONE;
+
+    case LE:
+    case LEU:
+      if (GET_CODE (operands[3]) == CONST_INT)
+	{
+	  /* reg_R = (reg_A <= const_int_B)
+	     --> movi reg_C, const_int_B + 1
+	         slt  reg_R, reg_A, reg_C */
+	  tmp_reg = gen_reg_rtx (SImode);
+
+	  emit_insn (gen_movsi (tmp_reg,
+				gen_int_mode (INTVAL (operands[3]) + 1,
+						      SImode)));
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
+	    }
+
+	  DONE;
+	}
+      else
+	{
+	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
+	                                  xori reg_R, reg_R, const_int_1 */
+	  if (code == LE)
+	    {
+	      /* LE, use slts instruction */
+	      emit_insn (gen_slts_compare (operands[0],
+					   operands[3], operands[2]));
+	    }
+	  else
+	    {
+	      /* LEU, use slt instruction */
+	      emit_insn (gen_slt_compare  (operands[0],
+					   operands[3], operands[2]));
+	    }
+
+	  /* perform 'not' behavior */
+	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+
+	  DONE;
+	}
+
+
+    default:
+      gcc_unreachable ();
+    }
+})
+
+
+(define_insn "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
+	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slts45\t%1, %2
+   sltsi45\t%1, %2
+   slts\t%0, %1, %2
+   sltsi\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+(define_insn "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
+		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+  ""
+  "@
+   slt45\t%1, %2
+   slti45\t%1, %2
+   slt\t%0, %1, %2
+   slti\t%0, %1, %2"
+  [(set_attr "type"   "compare,compare,compare,compare")
+   (set_attr "length" "      2,      2,      4,      4")])
+
+
+;; ----------------------------------------------------------------------------
+
+;; Unconditional and other jump instructions.
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+{
+  /* This unconditional jump has two forms:
+       32-bit instruction => j   imm24s << 1
+       16-bit instruction => j8  imm8s << 1
+
+     For 32-bit case,
+     we assume it is always reachable.
+     For 16-bit case,
+     it must satisfy { 255 >= (label - pc) >= -256 } condition.
+     However, since the $pc for nds32 is at the beginning of the instruction,
+     we should leave some length space for current insn.
+     So we use range -250 ~ 250.  */
+  switch (get_attr_length (insn))
+    {
+    case 2:
+      return "j8\t%0";
+    case 4:
+      return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+			   (le (minus (match_dup 0) (pc)) (const_int  250)))
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))
+		      (const_int 4)))])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jr5\t%0";
+  else
+    return "jr\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+;; Subroutine call instruction returning no value.
+;;   operands[0]: It should be a mem RTX whose address is
+;;                the the address of the function.
+;;   operands[1]: It is the number of bytes of arguments pushed as a const_int.
+;;   operands[2]: It is the number of registers used as operands.
+
+(define_expand "call"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_register"
+  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%0";
+  else
+    return "jral\t%0";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+        (if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_immediate"
+  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%0"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; Subroutine call instruction returning a value.
+;;   operands[0]: It is the hard regiser in which the value is returned.
+;;   The rest three operands are the same as the
+;;   three operands of the 'call' instruction.
+;;   (but with numbers increased by one)
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (match_operand 1 "memory_operand" "")
+		         (match_operand 2 "general_operand" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  ""
+)
+
+(define_insn "*call_value_register"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "register_operand" "r"))
+		         (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+{
+  if (TARGET_16_BIT)
+    return "jral5\t%1";
+  else
+    return "jral\t%1";
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "1")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_16_BIT")
+		      (const_int 2)
+		      (const_int 4)))])
+
+(define_insn "*call_value_immediate"
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:SI LP_REGNUM))])]
+  ""
+  "jal\t%1"
+  [(set_attr "type"   "branch")
+   (set_attr "length"      "4")])
+
+
+;; prologue and epilogue.
+
+(define_expand "prologue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3push prologue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_prologue_v3push();
+  else
+    nds32_expand_prologue();
+  DONE;
+})
+
+(define_expand "epilogue" [(const_int 0)]
+  ""
+{
+  /* Note that only under V3/V3M ISA, we could use v3pop epilogue.  */
+  if (TARGET_V3PUSH)
+    nds32_expand_epilogue_v3pop();
+  else
+    nds32_expand_epilogue();
+  DONE;
+})
+
+
+;; nop instruction.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop16"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; ----------------------------------------------------------------------------
+;; unspec operation patterns
+;; ----------------------------------------------------------------------------
+
+;; stack push/pop multiple
+
+(define_insn "*stack_push_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_PUSH_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'push.s' instruction.  */
+  const char *push_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create push_str string.
+     Note that 'smw.adm $sp,[$sp],$sp,0' means push nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    push_str = "push.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			push_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+(define_insn "*stack_pop_multiple"
+  [(match_parallel 3 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand" "r")
+		   (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_STACK_POP_MULTIPLE)
+     ])]
+  ""
+{
+  /* Because we need to return a string buffer used by output_asm_insn(),
+     we use a static character array to store desired assembly output.  */
+  static char str_buffer[100];
+
+  /* A string to present 'pop.s' instruction.  */
+  const char *pop_str = NULL;
+  /* A string to present Rb and Re operands.  */
+  const char *RbRe_str = NULL;
+  /* The En4 encoding string of the instruction is
+     in the bitwise fashion of following: "fp gp lp sp".  */
+  const char *En4_str[16] =
+    {
+      /* 0:  0  0  0  0 */
+      "",
+      /* 1:  0  0  0  1 */
+      "{ $sp }",
+      /* 2:  0  0  1  0 */
+      "{ $lp }",
+      /* 3:  0  0  1  1 */
+      "{ $lp $sp }",
+      /* 4:  0  1  0  0 */
+      "{ $gp }",
+      /* 5:  0  1  0  1 */
+      "{ $gp $sp }",
+      /* 6:  0  1  1  0 */
+      "{ $gp $lp }",
+      /* 7:  0  1  1  1 */
+      "{ $gp $lp $sp }",
+      /* 8:  1  0  0  0 */
+      "{ $fp }",
+      /* 9:  1  0  0  1 */
+      "{ $fp $sp }",
+      /* 10: 1  0  1  0 */
+      "{ $fp $lp }",
+      /* 11: 1  0  1  1 */
+      "{ $fp $lp $sp }",
+      /* 12: 1  1  0  0 */
+      "{ $fp $gp }",
+      /* 13: 1  1  0  1 */
+      "{ $fp $gp $sp }",
+      /* 14: 1  1  1  0 */
+      "{ $fp $gp $lp }",
+      /* 15: 1  1  1  1 */
+      "{ $fp $gp $lp $sp }"
+    };
+
+  /* Create pop_str string.
+     Note that 'lmw.bim $sp,[$sp],$sp,0' means pop nothing.  */
+  if (INTVAL (operands[2]) == 0
+      && REGNO (operands[0]) == SP_REGNUM
+      && REGNO (operands[1]) == SP_REGNUM)
+    return "";
+  else
+    pop_str = "pop.s\t";
+
+  /* Create RbRe_str string.
+     Note that we need to output ',' character if there exists En4 field.  */
+  if (REGNO (operands[0]) != SP_REGNUM && REGNO (operands[1]) != SP_REGNUM)
+      RbRe_str = (INTVAL (operands[2]) != 0) ? "%0, %1, " : "%0, %1";
+  else
+      RbRe_str = "";
+
+  /* Create complete assembly code string.  */
+  snprintf (str_buffer, sizeof (str_buffer),
+			"%s%s%s",
+			pop_str,
+			RbRe_str,
+			En4_str[INTVAL (operands[2])]);
+
+  /* Return formated string.  */
+  return str_buffer;
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "4")])
+
+;; stack v3push/v3pop
+
+(define_insn "*stack_v3push"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3PUSH)
+     ])]
+  ""
+{
+  return "v3push\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+(define_insn "*stack_v3pop"
+  [(match_parallel 2 ""
+     [(unspec:BLK [(match_operand:SI 0 "register_operand"  "r")
+		   (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_STACK_V3POP)
+     ])]
+  ""
+{
+  return "v3pop\t%0, %1";
+}
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+
+;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'.
+;; This pattern is designed to distinguish function return
+;; from general indirect_jump pattern so that we can directly
+;; generate 'ret5' for readability.
+
+(define_insn "unspec_func_return"
+  [(set (pc)
+	(unspec:SI [(reg:SI LP_REGNUM)] UNSPEC_FUNC_RETURN))]
+  ""
+  "ret5"
+  [(set_attr "type"   "misc")
+   (set_attr "length"    "2")])
+
+;; ----------------------------------------------------------------------------
+;; Jump Table patterns
+;; ----------------------------------------------------------------------------
+;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table)
+;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well.
+;;
+;; operands[0]: The index to dispatch on.
+;; operands[1]: The lower bound for indices in the table.
+;; operands[2]: The total range of indices int the table.
+;;              i.e. The largest index minus the smallest one.
+;; operands[3]: A label that precedes the table itself.
+;; operands[4]: A label to jump to if the index has a value outside the bounds.
+;;
+;; We need to create following sequences for jump table code generation:
+;;   A) k <-- (plus (operands[0]) (-operands[1]))
+;;   B) if (gtu k operands[2]) then goto operands[4]
+;;   C) t <-- operands[3]
+;;   D) z <-- (mem (plus (k << 0 or 1 or 2) t))
+;;   E) z <-- t + z (NOTE: This is only required for pc relative jump table.)
+;;   F) jump to target with register t or z
+;;
+;; The steps C, D, E, and F are performed by casesi_internal pattern.
+(define_expand "casesi"
+  [(match_operand:SI 0 "register_operand"  "r") ; index to jump on
+   (match_operand:SI 1 "immediate_operand" "i") ; lower bound
+   (match_operand:SI 2 "immediate_operand" "i") ; total range
+   (match_operand:SI 3 "" "")                   ; table label
+   (match_operand:SI 4 "" "")]                  ; Out of range label
+  ""
+{
+  rtx reg, test;
+
+  /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
+  if (operands[1] != const0_rtx)
+    {
+      reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_addsi3 (reg, operands[0],
+			     GEN_INT (-INTVAL (operands[1]))));
+      operands[0] = reg;
+    }
+
+  /* Step B: "if (gtu k operands[2]) then goto operands[4]".  */
+  test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
+  emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
+				  operands[4]));
+
+  operands[5] = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register operands[5].  */
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       operands[5]));
+  DONE;
+})
+
+;; We are receiving operands from casesi pattern:
+;;
+;; operands[0]: The index that have been substracted with lower bound.
+;; operands[1]: A label that precedes the table itself.
+;; operands[2]: A temporary register to retrieve value in table.
+;;
+;; We need to perform steps C, D, E, and F:
+;;
+;;   C) t <-- operands[1]
+;;   D) z <-- (mem (plus (operands[0] << m) t))
+;;            m is 2 for normal jump table.
+;;            m is 0, 1, or 2 for pc relative jump table based on diff size.
+;;   E) t <-- z + t (NOTE: This is only required for pc relative jump table.)
+;;   F) Jump to target with register t or z.
+;;
+;; The USE in this pattern is needed to tell flow analysis that this is
+;; a CASESI insn.  It has no other purpose.
+(define_insn "casesi_internal"
+  [(parallel [(set (pc)
+		   (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
+					     (const_int 4))
+				    (label_ref (match_operand 1 "" "")))))
+	      (use (label_ref (match_dup 1)))
+	      (clobber (match_operand:SI 2 "register_operand" ""))
+	      (clobber (reg:SI TA_REGNUM))])]
+  ""
+{
+  if (CASE_VECTOR_PC_RELATIVE)
+    return nds32_output_casesi_pc_relative (operands);
+  else
+    return nds32_output_casesi (operands);
+}
+  [(set_attr "length" "20")
+   (set_attr "type" "alu")])
+
+;; ----------------------------------------------------------------------------
+
+;; Performance Extension
+
+(define_insn "clzsi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "clz\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "smaxsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smax:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "max\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+(define_insn "sminsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(smin:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand" " r")))]
+  "TARGET_PERF_EXT"
+  "min\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
+
+;; ----------------------------------------------------------------------------