diff mbox series

[v8,05/12] LoongArch Port: Machine description C files and .h files.

Message ID 20220304071809.3082015-6-xuchenghua@loongson.cn
State New
Headers show
Series Add LoongArch support. | expand

Commit Message

Chenghua Xu March 4, 2022, 7:18 a.m. UTC
From: chenglulu <chenglulu@loongson.cn>

2022-03-04  Chenghua Xu  <xuchenghua@loongson.cn>
	    Lulu Cheng  <chenglulu@loongson.cn>

gcc/
	* config/host-linux.cc: Add LoongArch support.
	* config/loongarch/loongarch-protos.h: New file.
	* config/loongarch/loongarch-tune.h: Likewise.
	* config/loongarch/loongarch.cc: Likewise.
	* config/loongarch/loongarch.h: Likewise.
---
 gcc/config/host-linux.cc                |    2 +
 gcc/config/loongarch/loongarch-protos.h |  240 +
 gcc/config/loongarch/loongarch-tune.h   |   72 +
 gcc/config/loongarch/loongarch.cc       | 6450 +++++++++++++++++++++++
 gcc/config/loongarch/loongarch.h        | 1273 +++++
 5 files changed, 8037 insertions(+)
 create mode 100644 gcc/config/loongarch/loongarch-protos.h
 create mode 100644 gcc/config/loongarch/loongarch-tune.h
 create mode 100644 gcc/config/loongarch/loongarch.cc
 create mode 100644 gcc/config/loongarch/loongarch.h

Comments

Richard Sandiford March 7, 2022, 6:17 p.m. UTC | #1
Hi,

Some comments below, but otherwise it looks good to me.

A few of the comments are about removing hook or macro definitions
that are the same as the default.  Doing that helps people who want
to update a hook interface in future, since there are then fewer
places to adjust.

xuchenghua@loongson.cn writes:
> […]
> +/* Describes how a symbol is used.
> +
> +   SYMBOL_CONTEXT_CALL
> +       The symbol is used as the target of a call instruction.
> +
> +   SYMBOL_CONTEXT_LEA
> +       The symbol is used in a load-address operation.
> +
> +   SYMBOL_CONTEXT_MEM
> +       The symbol is used as the address in a MEM.  */
> +enum loongarch_symbol_context {
> +  SYMBOL_CONTEXT_CALL,
> +  SYMBOL_CONTEXT_LEA,
> +  SYMBOL_CONTEXT_MEM
> +};

It looks like this is unused: loongarch_classify_symbol takes an
argument of this type, but ignores it.

> […]
> +/* Classifies a type of call.
> +
> +   LARCH_CALL_NORMAL
> +	A normal call or call_value pattern.
> +
> +   LARCH_CALL_SIBCALL
> +	A sibcall or sibcall_value pattern.
> +
> +   LARCH_CALL_EPILOGUE
> +	A call inserted in the epilogue.  */
> +enum loongarch_call_type {
> +  LARCH_CALL_NORMAL,
> +  LARCH_CALL_SIBCALL,
> +  LARCH_CALL_EPILOGUE
> +};

This also looks unused.

> +
> +/* Controls the conditions under which certain instructions are split.
> +
> +   SPLIT_IF_NECESSARY
> +	Only perform splits that are necessary for correctness
> +	(because no unsplit version exists).
> +
> +   SPLIT_FOR_SPEED
> +	Perform splits that are necessary for correctness or
> +	beneficial for code speed.
> +
> +   SPLIT_FOR_SIZE
> +	Perform splits that are necessary for correctness or
> +	beneficial for code size.  */
> +enum loongarch_split_type {
> +  SPLIT_IF_NECESSARY,
> +  SPLIT_FOR_SPEED,
> +  SPLIT_FOR_SIZE
> +};

It looks like this is also unused: loongarch_split_move_p takes an
argument of this type, but ignores it.

I think it'd be better to remove these three enums for now and add them
back later if they become useful.

> […]
> +/* RTX costs of various operations on the different architectures.  */
> +struct loongarch_rtx_cost_data
> +{
> +  unsigned short fp_add;
> +  unsigned short fp_mult_sf;
> +  unsigned short fp_mult_df;
> +  unsigned short fp_div_sf;
> +  unsigned short fp_div_df;
> +  unsigned short int_mult_si;
> +  unsigned short int_mult_di;
> +  unsigned short int_div_si;
> +  unsigned short int_div_di;
> +  unsigned short branch_cost;
> +  unsigned short memory_latency;
> +};
> +
> +/* Default RTX cost initializer.  */
> +#define COSTS_N_INSNS(N) ((N) * 4)
> +#define DEFAULT_COSTS				\
> +    .fp_add		= COSTS_N_INSNS (1),	\
> +    .fp_mult_sf		= COSTS_N_INSNS (2),	\
> +    .fp_mult_df		= COSTS_N_INSNS (2),	\
> +    .fp_div_sf		= COSTS_N_INSNS (4),	\
> +    .fp_div_df		= COSTS_N_INSNS (4),	\
> +    .int_mult_si	= COSTS_N_INSNS (1),	\
> +    .int_mult_di	= COSTS_N_INSNS (1),	\
> +    .int_div_si		= COSTS_N_INSNS (1),	\
> +    .int_div_di		= COSTS_N_INSNS (1),	\
> +    .branch_cost	= 2,			\
> +    .memory_latency	= 4

Unfortunately we need to stay within standard C++(11), so the initialisers
can't use “.name = value” notation.

> […]
> +/* Classifies an address.
> +
> +   ADDRESS_REG
> +       A natural register + offset address.  The register satisfies
> +       loongarch_valid_base_register_p and the offset is a const_arith_operand.
> +
> +   ADDRESS_REG_REG
> +       A base register indexed by (optionally scaled) register.
> +
> +   ADDRESS_CONST_INT
> +       A signed 16-bit constant address.
> +
> +   ADDRESS_SYMBOLIC:
> +       A constant symbolic address.  */
> +enum loongarch_address_type
> +{
> +  ADDRESS_REG,
> +  ADDRESS_REG_REG,
> +  ADDRESS_CONST_INT,
> +  ADDRESS_SYMBOLIC
> +};
> +
> +
> +/* Information about an address described by loongarch_address_type.
> +
> +   ADDRESS_CONST_INT
> +       No fields are used.
> +
> +   ADDRESS_REG
> +       REG is the base register and OFFSET is the constant offset.
> +
> +   ADDRESS_SYMBOLIC
> +       SYMBOL_TYPE is the type of symbol that the address references.  */
> +struct loongarch_address_info
> +{
> +  enum loongarch_address_type type;
> +  rtx reg;
> +  rtx offset;
> +  enum loongarch_symbol_type symbol_type;
> +};

It'd be worth documenting what the fields mean for ADDRESS_REG_REG too.
It looks like OFFSET is the index register in that case.

> […]
> +/* The value of TARGET_ATTRIBUTE_TABLE.  */
> +static const struct attribute_spec loongarch_attribute_table[] = {
> +    /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
> +       affects_type_identity, handler, exclude }  */
> +      { NULL, 0, 0, false, false, false, false, NULL, NULL }
> +};

I think it'd be better to leave this and TARGET_ATTRIBUTE_TABLE
undefined until a non-terminator entry is needed.

> […]
> +/* See whether TYPE is a record whose fields should be returned in one or
> +   two floating-point registers.  If so, populate FIELDS accordingly.  */
> +
> +static unsigned
> +loongarch_pass_aggregate_in_fpr_pair_p (const_tree type,
> +

_p usually means a predicate that returns bool, and the name doesn't
really match the behaviour of the function (which as the comment says,
checks for single FPRs as well as pairs).

Maybe loongarch_pass_aggregate_num_fprs or something would be better.

> […]
> +/* See whether TYPE is a record whose fields should be returned in one or

typo: s/one or/one/

> +   floating-point register and one integer register.  If so, populate
> +   FIELDS accordingly.  */
> +
> +static bool
> +loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
> +					   loongarch_aggregate_field fields[2])
> +{
> +  unsigned num_int = 0, num_float = 0;
> +  int n = loongarch_flatten_aggregate_argument (type, fields);
> +
> +  for (int i = 0; i < n; i++)
> +    {
> +      num_float += SCALAR_FLOAT_TYPE_P (fields[i].type);
> +      num_int += INTEGRAL_TYPE_P (fields[i].type);
> +    }
> +
> +  return num_int == 1 && num_float == 1;
> +}
> […]
> +static void
> +loongarch_emit_stack_tie (void)
> +{
> +  if (Pmode == SImode)
> +    emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
> +  else
> +    emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));

These days it's possible to write this as:

  emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx, hard_frame_pointer_rtx));

with the stack_tie insn being defined using an “@” at the start of the name:

  (define_insn "@stack_tie<mode>"

Same for the later tls patterns.

> +}
> […]
> +/* Expand the "prologue" pattern.  */
> +
> +void
> +loongarch_expand_prologue (void)
> +{
> +  struct loongarch_frame_info *frame = &cfun->machine->frame;
> +  HOST_WIDE_INT size = frame->total_size;
> +  HOST_WIDE_INT tmp;
> +  unsigned mask = frame->mask;
> +  rtx insn;
> +
> +  if (flag_stack_usage_info)
> +    current_function_static_stack_size = size;
> +
> +  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
> +      || flag_stack_clash_protection)
> +    {
> +      if (crtl->is_leaf && !cfun->calls_alloca)
> +	{
> +	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
> +	    {
> +	      tmp = size - get_stack_check_protect ();
> +	      loongarch_emit_probe_stack_range (get_stack_check_protect (),
> +						tmp);
> +	    }
> +	}
> +      else if (size > 0)
> +	loongarch_emit_probe_stack_range (get_stack_check_protect (), size);
> +    }
> +
> +  /* Save the registers.  */
> +  if ((frame->mask | frame->fmask) != 0)
> +    {
> +      HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame));
> +
> +      insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
> +			    GEN_INT (-step1));
> +      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
> +      size -= step1;
> +      loongarch_for_each_saved_reg (size, loongarch_save_reg);
> +    }
> +
> +  frame->mask = mask; /* Undo the above fib.  */

Is this and the “unsigned mask = frame->mask” still needed?
I couldn't see where frame->mask was changed.

> […]
> +/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
> +   says which.  */
> +
> +void
> +loongarch_expand_epilogue (bool sibcall_p)
> +{
> +  /* Split the frame into two.  STEP1 is the amount of stack we should
> +     deallocate before restoring the registers.  STEP2 is the amount we
> +     should deallocate afterwards.
> +
> +     Start off by assuming that no registers need to be restored.  */
> +  struct loongarch_frame_info *frame = &cfun->machine->frame;
> +  HOST_WIDE_INT step1 = frame->total_size;
> +  HOST_WIDE_INT step2 = 0;
> +  rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
> +  rtx insn;
> +
> +  /* We need to add memory barrier to prevent read from deallocated stack.  */
> +  bool need_barrier_p
> +    = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
> +
> +  if (!sibcall_p && loongarch_can_use_return_insn ())
> +    {
> +      emit_jump_insn (gen_return ());
> +      return;
> +    }
> +
> +  /* Move past any dynamic stack allocations.  */
> +  if (cfun->calls_alloca)
> +    {
> +      /* Emit a barrier to prevent loads from a deallocated stack.  */
> +      loongarch_emit_stack_tie ();
> +      need_barrier_p = false;
> +
> +      rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
> +      if (!IMM12_OPERAND (INTVAL (adjust)))
> +	{
> +	  loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
> +	  adjust = LARCH_PROLOGUE_TEMP (Pmode);
> +	}
> +
> +      insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
> +				       hard_frame_pointer_rtx,
> +				       adjust));
> +
> +      rtx dwarf = NULL_RTX;
> +      rtx minus_offset = NULL_RTX;
> +      minus_offset = GEN_INT (-frame->hard_frame_pointer_offset);

The minus_offset = NULL_RTX looks redundant, might as well initialise
with the GEN_INT instead.

> […]
> +/* Return true if SYMBOL_REF X is associated with a global symbol
> +   (in the STB_GLOBAL sense).  */
> +
> +bool
> +loongarch_global_symbol_p (const_rtx x)
> +{
> +  if (GET_CODE (x) == LABEL_REF)
> +    return false;
> +
> +  const_tree decl = SYMBOL_REF_DECL (x);
> +
> +  if (!decl)
> +    return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
> +
> +  /* Weakref symbols are not TREE_PUBLIC, but their targets are global
> +     or weak symbols.  Relocations in the object file will be against
> +     the target symbol, so it's that symbol's binding that matters here.  */
> +  return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl));
> +}
> +
> +bool
> +loongarch_global_symbol_noweak_p (const_rtx x)
> +{
> +  if (GET_CODE (x) == LABEL_REF)
> +    return false;
> +
> +  const_tree decl = SYMBOL_REF_DECL (x);
> +
> +  if (!decl)
> +    return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
> +
> +  /* Weakref symbols are not TREE_PUBLIC, but their targets are global
> +     or weak symbols.  Relocations in the object file will be against
> +     the target symbol, so it's that symbol's binding that matters here.  */

This comment doesn't apply to the noweak function.

> +  return DECL_P (decl) && TREE_PUBLIC (decl);
> +}
> […]
> +/* Return the method that should be used to access SYMBOL_REF or
> +   LABEL_REF X in context CONTEXT.  */
> +
> +static enum loongarch_symbol_type
> +loongarch_classify_symbol (const_rtx x,
> +			   enum loongarch_symbol_context context  \
> +			   ATTRIBUTE_UNUSED)
> +{
> +  if (GET_CODE (x) == LABEL_REF)
> +    {
> +      return SYMBOL_GOT_DISP;
> +    }

Minor formatting nit: redundant braces.  Some other instances
later in the file.

> +
> +  gcc_assert (GET_CODE (x) == SYMBOL_REF);
> +
> +  if (SYMBOL_REF_TLS_MODEL (x))
> +    return SYMBOL_TLS;
> +
> +  if (GET_CODE (x) == SYMBOL_REF)
> +    return SYMBOL_GOT_DISP;
> +
> +  return SYMBOL_GOT_DISP;
> +}
> +
> +/* Return true if X is a symbolic constant that can be used in context
> +   CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
> +
> +bool
> +loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context,
> +			       enum loongarch_symbol_type *symbol_type)
> +{
> +  rtx offset;
> +
> +  split_const (x, &x, &offset);
> +  if (UNSPEC_ADDRESS_P (x))
> +    {
> +      *symbol_type = UNSPEC_ADDRESS_TYPE (x);
> +      x = UNSPEC_ADDRESS (x);
> +    }
> +  else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
> +    {
> +      *symbol_type = loongarch_classify_symbol (x, context);
> +      if (*symbol_type == SYMBOL_TLS)
> +	return true;
> +    }
> +  else
> +    return false;
> +
> +  if (offset == const0_rtx)
> +    return true;
> +
> +  /* Check whether a nonzero offset is valid for the underlying
> +     relocations.  */
> +  switch (*symbol_type)
> +    {
> +      /* Fall through.  */
> +

There are no longer any previous cases to fall through from.

> +    case SYMBOL_GOT_DISP:
> +    case SYMBOL_TLSGD:
> +    case SYMBOL_TLSLDM:
> +    case SYMBOL_TLS:
> +      return false;
> +    }
> +  gcc_unreachable ();
> +}
> +
> +/* Like loongarch_symbol_insns We rely on the fact that, in the worst case.  */

This comment seems incomplete.

> +
> +static int
> +loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode)
> +{
> +  switch (type)
> +    {
> +    case SYMBOL_GOT_DISP:
> +      /* The constant will have to be loaded from the GOT before it
> +	 is used in an address.  */
> +      if (mode != MAX_MACHINE_MODE)
> +	return 0;
> +
> +      /* Fall through.  */

No (case) fall through here.  I guess the comment should just be deleted.

> +
> +      return 3;
> +
> +    case SYMBOL_TLSGD:
> +    case SYMBOL_TLSLDM:
> +      return 1;
> +
> +    case SYMBOL_TLS:
> +      /* We don't treat a bare TLS symbol as a constant.  */
> +      return 0;
> +    }
> +  gcc_unreachable ();
> +}
> +
> +/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed
> +   to load symbols of type TYPE into a register.  Return 0 if the given
> +   type of symbol cannot be used as an immediate operand.
> +
> +   Otherwise, return the number of instructions needed to load or store
> +   values of mode MODE to or from addresses of type TYPE.  Return 0 if
> +   the given type of symbol is not valid in addresses.
> +
> +   In both cases, instruction counts are based off BASE_INSN_LENGTH.  */
> +
> +static int
> +loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
> +{
> +  return loongarch_symbol_insns_1 (type, mode) * (1);

“* (1)” looks odd, maybe just remove.

> +}
> […]
> +/* Return true if, for every base register BASE_REG, (plus BASE_REG X)
> +   can address a value of mode MODE.  */
> +
> +static bool
> +loongarch_valid_offset_p (rtx x, machine_mode mode)
> +{
> +  /* Check that X is a signed 12-bit number,
> +   * or check that X is a signed 16-bit number
> +   * and offset 4 byte aligned.  */

Minor nit, but it isn't GNU style to indent comment lines with a leading “*”.
(This was the only instance I could see in the patch.)

> +  if (!(const_arith_operand (x, Pmode)
> +	|| ((mode == E_SImode || mode == E_DImode)
> +	    && const_imm16_operand (x, Pmode)
> +	    && (loongarch_signed_immediate_p (INTVAL (x), 14, 2)))))
> +    return false;
> +
> +  /* We may need to split multiword moves, so make sure that every word
> +     is accessible.  */
> +  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
> +      && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
> +    return false;
> +
> +  return true;
> +}
> +
> […]
> +/* Return the number of instructions needed to load constant X,
> +   assuming that BASE_INSN_LENGTH is the length of one instruction.
> +   Return 0 if X isn't a valid constant.  */
> +
> +int
> +loongarch_const_insns (rtx x)
> +{
> +  enum loongarch_symbol_type symbol_type;
> +  rtx offset;
> +
> +  switch (GET_CODE (x))
> +    {
> +    case CONST_INT:
> +      return loongarch_integer_cost (INTVAL (x));
> +
> +    case CONST_VECTOR:
> +      /* Fall through.  */
> +    case CONST_DOUBLE:
> +      /* Allow zeros for normal mode, where we can use $0.  */

“normal mode” is a holdover from MIPS (where it meant “not MIPS16
and not micromips”).

> +      return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
> +
> +    case CONST:
> +      /* See if we can refer to X directly.  */
> +      if (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_LEA, &symbol_type))
> +	return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE);
> +
> +      /* Otherwise try splitting the constant into a base and offset.
> +	 If the offset is a 12-bit value, we can load the base address
> +	 into a register and then use ADDI.{W/D} to add in the offset.
> +	 If the offset is larger, we can load the base and offset
> +	 into separate registers and add them together with ADD.{W/D}.
> +	 However, the latter is only possible before reload; during
> +	 and after reload, we must have the option of forcing the
> +	 constant into the pool instead.  */
> +      split_const (x, &x, &offset);
> +      if (offset != 0)
> +	{
> +	  int n = loongarch_const_insns (x);
> +	  if (n != 0)
> +	    {
> +	      if (IMM12_INT (offset))
> +		return n + 1;
> +	      else if (!targetm.cannot_force_const_mem (GET_MODE (x), x))
> +		return n + 1 + loongarch_integer_cost (INTVAL (offset));
> +	    }
> +	}
> +      return 0;
> +
> +    case SYMBOL_REF:
> +    case LABEL_REF:
> +      return loongarch_symbol_insns (
> +	loongarch_classify_symbol (x, SYMBOL_CONTEXT_LEA), MAX_MACHINE_MODE);
> +
> +    default:
> +      return 0;
> +    }
> +}
> +
> +/* X is a doubleword constant that can be handled by splitting it into
> +   two words and loading each word separately.  Return the number of
> +   instructions required to do this, assuming that BASE_INSN_LENGTH
> +   is the length of one instruction.  */

The BASE_INSN_LENGTH part no longer applies.  Same for later functions.

> […]
> +/* Return true if there is a instruction that implements CODE
> +   and if that instruction accepts X as an immediate operand.  */
> +
> +static int
> +loongarch_immediate_operand_p (int code, HOST_WIDE_INT x)
> +{
> +  switch (code)
> +    {
> +    case ASHIFT:
> +    case ASHIFTRT:
> +    case LSHIFTRT:
> +      /* All shift counts are truncated to a valid constant.  */
> +      return true;
> +
> +    case ROTATE:
> +    case ROTATERT:
> +      /* Likewise rotates, if the target supports rotates at all.  */

The comment doesn't really apply to Loongson, where rotates are
unconditionally available.

> […]
> +/* Return the cost of sign-extending OP to mode MODE, not including the
> +   cost of OP itself.  */
> +
> +static int
> +loongarch_sign_extend_cost (machine_mode mode, rtx op)
> +{
> +  if (MEM_P (op))
> +    /* Extended loads are as cheap as unextended ones.  */
> +    return 0;
> +
> +  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
> +    /* A sign extension from SImode to DImode in 64-bit mode is free.  */
> +    return 0;

Coming back to the TRULY_NOOP_TRUNCATION question from patch 4:
this would no longer be true if the definition of TRULY_NOOP_TRUNCATION
were removed.  On balance, though, I'd expect removing the definition
to improve the quality of the generated code.

> +
> +  return COSTS_N_INSNS (1);
> +}
> […]
> +/* Return one word of double-word value OP, taking into account the fixed
> +   endianness of certain registers.  HIGH_P is true to select the high part,
> +   false to select the low part.  */
> +
> +rtx
> +loongarch_subword (rtx op, bool high_p)
> +{
> +  unsigned int byte, offset;
> +  machine_mode mode;
> +
> +  mode = GET_MODE (op);
> +  if (mode == VOIDmode)
> +    mode = TARGET_64BIT ? TImode : DImode;
> +
> +  if (high_p)
> +    byte = UNITS_PER_WORD;
> +  else
> +    byte = 0;
> +
> +  if (FP_REG_RTX_P (op))
> +    {
> +      /* Paired FPRs are always ordered little-endian.  */
> +      offset = (UNITS_PER_WORD < UNITS_PER_HWFPVALUE ? high_p : byte != 0);
> +      return gen_rtx_REG (word_mode, REGNO (op) + offset);
> +    }

Do you need this?  Loongson seems to be little-endian only, so this…

> +
> +  if (MEM_P (op))
> +    return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
> +
> +  return simplify_gen_subreg (word_mode, op, mode, byte);

…default behaviour should work.

> +}
> +
> […]
> +/* Forward declaration.  Used below.  */
> +static HOST_WIDE_INT
> +loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align);

It'd be better to move the definition further up, since there isn't a cyclic
dependency.

> […]
> +}
> +/* Return the appropriate instructions to move SRC into DEST.  Assume
> +   that SRC is operand 1 and DEST is operand 0.  */

Nit: missing newline before the comment.

> +
> +const char *
> +loongarch_output_move (rtx dest, rtx src)
> +{
> […]
> +      if (symbolic_operand (src, VOIDmode))
> +	{
> +	  if ((TARGET_CMODEL_TINY && (!loongarch_global_symbol_p (src)
> +				      || loongarch_symbol_binds_local_p (src)))
> +	      || (TARGET_CMODEL_TINY_STATIC && !loongarch_weak_symbol_p (src)))
> +	    {
> +	      /* The symbol must be aligned to 4 byte.  */
> +	      unsigned int align;
> +
> +	      if (GET_CODE (src) == LABEL_REF)
> +		align = 128 /* Whatever.  */;

128 seems a strange choice.  Couldn't it just be 32 (the number of bits
in an instruction) given the later use?

> +	      else if (CONSTANT_POOL_ADDRESS_P (src))
> +		align = GET_MODE_ALIGNMENT (get_pool_mode (src));
> +	      else if (TREE_CONSTANT_POOL_ADDRESS_P (src))
> +		{
> +		  tree exp = SYMBOL_REF_DECL (src);
> +		  align = TYPE_ALIGN (TREE_TYPE (exp));
> +		  align = loongarch_constant_alignment (exp, align);
> +		}
> +	      else if (SYMBOL_REF_DECL (src))
> +		align = DECL_ALIGN (SYMBOL_REF_DECL (src));
> +	      else if (SYMBOL_REF_HAS_BLOCK_INFO_P (src)
> +		       && SYMBOL_REF_BLOCK (src) != NULL)
> +		align = SYMBOL_REF_BLOCK (src)->alignment;
> +	      else
> +		align = BITS_PER_UNIT;
> +
> +	      if (align % (4 * 8) == 0)
> +		return "pcaddi\t%0,%%pcrel(%1)>>2";
> +	    }
> +	  if (TARGET_CMODEL_TINY
> +	      || TARGET_CMODEL_TINY_STATIC
> +	      || TARGET_CMODEL_NORMAL
> +	      || TARGET_CMODEL_LARGE)
> +	    {
> +	      if (!loongarch_global_symbol_p (src)
> +		  || loongarch_symbol_binds_local_p (src))
> +		return "la.local\t%0,%1";
> +	      else
> +		return "la.global\t%0,%1";
> +	    }
> +	  if (TARGET_CMODEL_EXTREME)
> +	    {
> +	      sorry ("Normal symbol loading not implemented in extreme mode.");
> +	      /* GCC complains.  */
> +	      /* return ""; */

Similar comments here to the review of part 4: sorry () isn't a fatal error,
so the function still needs to return something.  Is this a situation
that can be triggered by specific source code, or is it an internal
error that the user should never see?

> […]
> +/* Like riscv_emit_int_compare, but for floating-point comparisons.  */

s/riscv/loongarch/

> +
> +static void
> +loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
> +{
> […]
> +/* Implement TARGET_ASM_FUNCTION_RODATA_SECTION.
> +
> +   The complication here is that, with the combination
> +   !TARGET_ABSOLUTE_ABICALLS , jump tables will use
> +   absolute addresses, and should therefore not be included in the
> +   read-only part of a DSO.  Handle such cases by selecting a normal
> +   data section instead of a read-only one.  The logic apes that in
> +   default_function_rodata_section.  */

The !TARGET_ABSOLUTE_ABICALLS bit doesn't apply to Loongson.
I think you can just say “The complication here is that jump tables…”.

> +
> +static section *
> +loongarch_function_rodata_section (tree decl, bool)
> +{
> +  return default_function_rodata_section (decl, false);
> +}
> […]
> +/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P.  We don't want to use
> +   anchors for small data: the GP register acts as an anchor in that
> +   case.  We also don't want to use them for PC-relative accesses,
> +   where the PC acts as an anchor.  */
> +
> +static bool
> +loongarch_use_anchors_for_symbol_p (const_rtx symbol)
> +{
> +  return default_use_anchors_for_symbol_p (symbol);
> +}

The comment doesn't describe the behaviour.  Probably easiest to leave
this hook undefined.

> […]
> +/* Implement TARGET_DWARF_FRAME_REG_MODE.  */
> +
> +static machine_mode
> +loongarch_dwarf_frame_reg_mode (int regno)
> +{
> +  machine_mode mode = default_dwarf_frame_reg_mode (regno);
> +
> +  return mode;
> +}

This is the default behaviour, so it can be deleted.

> […]
> +#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
> +extern int size_directive_output;
> +
> +/* Implement ASM_DECLARE_OBJECT_NAME.  This is like most of the standard ELF
> +   definitions except that it uses loongarch_declare_object to emit the label.
> +*/
> +
> +void
> +loongarch_declare_object_name (FILE *stream, const char *name,
> +			       tree decl ATTRIBUTE_UNUSED)
> +{
> +#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
> +#ifdef USE_GNU_UNIQUE_OBJECT

Are these preprocessor conditions necessary for Loongson?  I would have
expected things to be more regular than they were for MIPS.

(Also applies to the ASM_OUTPUT_SIZE_DIRECTIVE condition above,
and to similar conditions later.)

> +  /* As in elfos.h.  */
> +  if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (decl)
> +      && (!DECL_ARTIFICIAL (decl) || !TREE_READONLY (decl)))
> +    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "gnu_unique_object");
> +  else
> +#endif
> +    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
> +#endif
> +
> +  size_directive_output = 0;
> +  if (!flag_inhibit_size_directive && DECL_SIZE (decl))
> +    {
> +      HOST_WIDE_INT size;
> +
> +      size_directive_output = 1;
> +      size = int_size_in_bytes (TREE_TYPE (decl));
> +      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
> +    }
> +
> +  loongarch_declare_object (stream, name, "", ":\n");
> +}
> […]
> +/* Implement TARGET_ASM_FILE_START.  */
> +
> +static void
> +loongarch_file_start (void)
> +{
> +  default_file_start ();
> +}

This is the default behaviour, so it can be deleted.

> […]
> +bool
> +loongarch_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
> +				unsigned int new_reg ATTRIBUTE_UNUSED)
> +{
> +  return true;
> +}

Same for this.

> […]
> +/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
> +
> +static bool
> +loongarch_can_change_mode_class (machine_mode from, machine_mode to,
> +				 reg_class_t rclass)
> +{
> +  /* Allow conversions between different Loongson integer vectors,
> +     and between those vectors and DImode.  */
> +  if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8
> +      && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
> +    return true;

Is the comment accurate?  There didn't seem to be any integer vector support.

> +
> +  return !reg_classes_intersect_p (FP_REGS, rclass);
> +}
> […]
> +/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
> +
> +   This can be achieved using MOVFRH2GR.S/MOVGR2FRH.W when these instructions
> +   are available but otherwise moves must go via memory.  Using
> +   MOVGR2FR/MOVFR2GR to access the lower-half of these registers would require
> +   a forbidden single-precision access.  We require all double-word moves to
> +   use memory because adding even and odd floating-point registers classes
> +   would have a significant impact on the backend.  */
> +
> +static bool
> +loongarch_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
> +				   reg_class_t class1, reg_class_t class2)
> +{
> +  /* Ignore spilled pseudos.  */
> +  if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
> +    return false;
> +
> +  return false;
> +}

Looks like this hook definition can be deleted (which is a good sign!)

> […]
> +/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
> +
> +static machine_mode
> +loongarch_preferred_simd_mode (scalar_mode mode ATTRIBUTE_UNUSED)
> +{
> +  return word_mode;
> +}

This is the default behaviour, so it can be deleted.

> […]
> +/* Return the number of instructions that can be issued per cycle.  */
> +
> +static int
> +loongarch_issue_rate (void)
> +{
> +  if ((unsigned long) __ACTUAL_TUNE < N_TUNE_TYPES)
> +    return loongarch_cpu_issue_rate[__ACTUAL_TUNE];
> +  else
> +    return 1;
> +}

Following on from a comment about patch 2: __FOO macros are in the
implementation namespace, so __ACTUAL_TUNE should be something like
LARCH_ACTUAL_TUNE instead.

> […]
> +/* A structure representing the state of the processor pipeline.
> +   Used by the loongarch_sim_* family of functions.  */
> +
> +struct loongarch_sim
> +{
> +  /* The maximum number of instructions that can be issued in a cycle.
> +     (Caches loongarch_issue_rate.)  */
> +  unsigned int issue_rate;
> +
> +  /* The current simulation time.  */
> +  unsigned int time;
> +
> +  /* How many more instructions can be issued in the current cycle.  */
> +  unsigned int insns_left;
> +
> +  /* LAST_SET[X].INSN is the last instruction to set register X.
> +     LAST_SET[X].TIME is the time at which that instruction was issued.
> +     INSN is null if no instruction has yet set register X.  */
> +  struct
> +  {
> +    rtx_insn *insn;
> +    unsigned int time;
> +  } last_set[FIRST_PSEUDO_REGISTER];
> +
> +  /* The pipeline's current DFA state.  */
> +  state_t dfa_state;
> +};
> +
> +/* Reset STATE to the initial simulation state.  */
> +
> +static void
> +loongarch_sim_reset (struct loongarch_sim *state)
> +{
> +  curr_state = state->dfa_state;
> +
> +  state->time = 0;
> +  state->insns_left = state->issue_rate;
> +  memset (&state->last_set, 0, sizeof (state->last_set));
> +  state_reset (curr_state);
> +
> +  targetm.sched.init (0, false, 0);
> +  advance_state (curr_state);
> +}
> +
> +/* Initialize STATE before its first use.  DFA_STATE points to an
> +   allocated but uninitialized DFA state.  */
> +
> +static void
> +loongarch_sim_init (struct loongarch_sim *state, state_t dfa_state)
> +{
> +  if (targetm.sched.init_dfa_pre_cycle_insn)
> +    targetm.sched.init_dfa_pre_cycle_insn ();
> +
> +  if (targetm.sched.init_dfa_post_cycle_insn)
> +    targetm.sched.init_dfa_post_cycle_insn ();
> +
> +  state->issue_rate = loongarch_issue_rate ();
> +  state->dfa_state = dfa_state;
> +  loongarch_sim_reset (state);
> +}
> +
> +/* Set up costs based on the current architecture and tuning settings.  */
> +
> +static void
> +loongarch_set_tuning_info (void)
> +{
> +  dfa_start ();
> +
> +  struct loongarch_sim state;
> +  loongarch_sim_init (&state, alloca (state_size ()));
> +
> +  dfa_finish ();
> +}
> +/* Implement TARGET_EXPAND_TO_RTL_HOOK.  */
> +
> +static void
> +loongarch_expand_to_rtl_hook (void)
> +{
> +  /* We need to call this at a point where we can safely create sequences
> +     of instructions, so TARGET_OVERRIDE_OPTIONS is too early.  We also
> +     need to call it at a point where the DFA infrastructure is not
> +     already in use, so we can't just call it lazily on demand.
> +
> +     At present, loongarch_tuning_info is only needed during post-expand
> +     RTL passes such as split_insns, so this hook should be early enough.
> +     We may need to move the call elsewhere if loongarch_tuning_info starts
> +     to be used for other things (such as rtx_costs, or expanders that
> +     could be called during gimple optimization).  */
> +
> +  loongarch_set_tuning_info ();
> +}

Do you need the code above?  It doesn't seem to be using the
simulator state.

IIRC, on MIPS this was originally used to pad instructions in a
VLIW-like way for NEC's VR4131.

> […]
> +static void
> +loongarch_option_override_internal (struct gcc_options *opts,
> +				    struct gcc_options *opts_set);

Here too it would be better to rearrange the functions to avoid
the forward declaration.

> +
> +/* Implement TARGET_OPTION_OVERRIDE.  */
> +
> +static void
> +loongarch_option_override (void)
> +{
> +  loongarch_option_override_internal (&global_options, &global_options_set);
> +}
> +
> +static void
> +loongarch_option_override_internal (struct gcc_options *opts,
> +				    struct gcc_options *opts_set)
> +{
> +  int i, regno, mode;
> +
> +  (void) opts_set;

Might as well just drop the argument name from the prototype instead.

> +
> +  if (flag_pic)
> +    g_switch_value = 0;
> +
> +  /* Handle target-specific options: compute defaults/conflicts etc.  */
> +  loongarch_config_target (&la_target, la_opt_switches,
> +			   la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu,
> +			   la_opt_abi_base, la_opt_abi_ext, la_opt_cmodel, 0);
> +
> +  /* End of code shared with GAS.  */

Just checking: is this code shared with GAS, in the way that the
MIPS code was?  (Genuine question, since it might be.)

> […]
> +/* Implement TARGET_SHIFT_TRUNCATION_MASK.  We want to keep the default
> +   behavior of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes.  */
> +
> +static unsigned HOST_WIDE_INT
> +loongarch_shift_truncation_mask (machine_mode mode)
> +{
> +  return GET_MODE_BITSIZE (mode) - 1;
> +}

It shouldn't be necessary to define this hook, given that
SHIFT_COUNT_TRUNCATED is 1.  (The reason MIPS had to define
it was that vector shifts didn't truncate in the same way.)

> +
> +/* Implement TARGET_SCHED_REASSOCIATION_WIDTH.  */
> +
> +static int
> +loongarch_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
> +				     machine_mode mode ATTRIBUTE_UNUSED)
> +{
> +  return 1;
> +}

This also shouldn't be necessary, since it matches the default.

> […]
> +/* Implement TARGET_CASE_VALUES_THRESHOLD.  */
> +
> +unsigned int
> +loongarch_case_values_threshold (void)
> +{
> +  return default_case_values_threshold ();
> +}

This is also the default.

> […]
> +/* All our function attributes are related to how out-of-line copies should
> +   be compiled or called.  They don't in themselves prevent inlining.  */
> +#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
> +#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true

This shouldn't be necessary, since there are no target-specific
attributes.

> […]
> diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
> new file mode 100644
> index 00000000000..791f9d064c5
> --- /dev/null
> +++ b/gcc/config/loongarch/loongarch.h
> @@ -0,0 +1,1273 @@
> +/* Definitions of target machine for GNU compiler.  LoongArch version.
> +   Copyright (C) 2021-2022 Free Software Foundation, Inc.
> +   Contributed by Loongson Ltd.
> +   Based on MIPS and RISC-V target for GNU compiler.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +/* LoongArch external variables defined in loongarch.c.  */

longarch.cc now :-)

> […]
> +/* Support for a compile-time default CPU, et cetera.  The rules are:
> +   --with-divide is ignored if -mdivide-traps or -mdivide-breaks are
> +     specified.  */
> +#define OPTION_DEFAULT_SPECS \
> +  {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}"},

Loongson doesn't have the -mdivide-* options, so I think this can
be deleted.

> […]
> +/* This definition replaces the formerly used 'm' constraint with a
> +   different constraint letter in order to avoid changing semantics of
> +   the 'm' constraint when accepting new address formats in
> +   TARGET_LEGITIMATE_ADDRESS_P.  The constraint letter defined here
> +   must not be used in insn definitions or inline assemblies.  */
> +#define TARGET_MEM_CONSTRAINT 'w'

Do you need to do this for a new port like Loongson?  It looks like
TARGET_LEGITIMATE_ADDRESS_P accepts all valid forms of address,
including reg+reg (good!), so shouldn't "m" accept them too?

If there is already existing code that assumes that "m" is never indexed
then the definition obviously makes sense.  But if you don't know of any
such code then it would be better to make "m" accept all the things that
TARGET_LEGITIMATE_ADDRESS_P does (by removing this definition).

> +
> +/* Tell collect what flags to pass to nm.  */
> +#ifndef NM_FLAGS
> +#define NM_FLAGS "-Bn"
> +#endif
> +
> +/* SUBTARGET_ASM_DEBUGGING_SPEC handles passing debugging options to
> +   the assembler.  It may be overridden by subtargets.  */
> +
> +#ifndef SUBTARGET_ASM_DEBUGGING_SPEC
> +#define SUBTARGET_ASM_DEBUGGING_SPEC "\
> +%{g} %{g0} %{g1} %{g2} %{g3} \
> +%{ggdb:-g} %{ggdb0:-g0} %{ggdb1:-g1} %{ggdb2:-g2} %{ggdb3:-g3} \
> +%{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \
> +%{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3}"

It doesn't look like this and %(subtarget_asm_debugging_spec)
(from EXTRA_SPECS) are used.  That's a good thing, since the stabs
stuff shouldn't be needed.

Probably best to remove this definition and the EXTRA_SPECS entry.

> +#endif
> […]
> +/* The number of consecutive floating-point registers needed to store the
> +   largest format supported by the FPU.  */
> +#define MAX_FPRS_PER_FMT 1
> +
> +/* The number of consecutive floating-point registers needed to store the
> +   smallest format supported by the FPU.  */
> +#define MIN_FPRS_PER_FMT 1

It might be clearer to drop this abstraction for Loongson, given that
it doesn't have the MIPS restrictions around 32-bit FPR pairs.

> […]
> +#define CALLEE_SAVED_REG_NUMBER(REGNO) \
> +  ((REGNO) >= 22 && (REGNO) <= 31 ? (REGNO) -22 : -1)

Formatting nit: missing space between “-” and “22”.

> […]
> +enum reg_class
> +{
> +  NO_REGS,	  /* no registers in set  */
> +  SIBCALL_REGS,	  /* SIBCALL_REGS  */
> +  JIRL_REGS,	  /* JIRL_REGS  */

These two comments don't really say anything.  It would be worth
describing them briefly in words.  E.g.: I assume SIBCALL_REGS
is something like:

  /* registers that can hold an indirect sibcall address.  */

> +  GR_REGS,	  /* integer registers  */
> +  CSR_REGS,	  /* integer registers except for $r0 and $r1 for lcsr.  */

Normally it's better to list supersets after subsets, so I think these
two should be the other way around.  Swapping them would mean updating
the register class macros too, of course.

> +  FP_REGS,	  /* floating point registers  */
> +  FCC_REGS,	  /* status registers (fp status)  */
> +  FRAME_REGS,	  /* arg pointer and frame pointer  */
> +  ALL_REGS,	  /* all registers  */
> +  LIM_REG_CLASSES /* max value + 1  */
> +};
> […]
> +#define CONST_LOW_PART(VALUE) ((VALUE) -CONST_HIGH_PART (VALUE))

Formatting nit: missing space after “-”.

> […]
> +#define ELIMINABLE_REGS \
> +  { \
> +    {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
> +      {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
> +      {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
> +      {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \

Very minor, sorry, but: these last three lines look over-indented.

> +  }
> […]
> +/* This structure has to cope with two different argument allocation
> +   schemes.  Most LoongArch ABIs view the arguments as a structure, of which
> +   the first N words go in registers and the rest go on the stack.  If I
> +   < N, the Ith word might go in Ith integer argument register or in a
> +   floating-point register.  For these ABIs, we only need to remember
> +   the offset of the current argument into the structure.
> +
> +   So for the standard ABIs, the first N words are allocated to integer
> +   registers, and loongarch_function_arg decides on an argument-by-argument
> +   basis whether that argument should really go in an integer register,
> +   or in a floating-point one.  */

Does the comment apply to Loongson?  It seemed like there was only
really one allocation scheme (excepting soft-float vs. hard-float,
but I don't think the comment was referring to that).

> +
> +typedef struct loongarch_args
> +{
> +  /* Number of integer registers used so far, up to MAX_ARGS_IN_REGISTERS.  */
> +  unsigned int num_gprs;
> +
> +  /* Number of floating-point registers used so far, likewise.  */
> +  unsigned int num_fprs;
> +
> +} CUMULATIVE_ARGS;
> +
> […]
> +/* Output assembler code to FILE to increment profiler label # LABELNO
> +   for profiling a function entry.  */
> +
> +#define MCOUNT_NAME "_mcount"

This comment looks like a remnant from when profiling code was
injected as asm text.  I think it can be deleted now.

> […]
> +#define CASE_VECTOR_MODE Pmode
> +
> +/* Only use short offsets if their range will not overflow.  */
> +#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode

The comment doesn't seem to match the code.

> […]
> +struct GTY (()) machine_function
> +{
> +  /* The next floating-point condition-code register to allocate
> +     for 8CC targets, relative to FCC_REG_FIRST.  */
> +  unsigned int next_fcc;
> +
> +  /* The number of extra stack bytes taken up by register varargs.
> +     This area is allocated by the callee at the very top of the frame.  */
> +  int varargs_size;
> +
> +  /* The current frame information, calculated by loongarch_compute_frame_info.
> +   */
> +  struct loongarch_frame_info frame;
> +
> +  /* True if at least one of the formal parameters to a function must be
> +     written to the frame header (probably so its address can be taken).  */
> +  bool does_not_use_frame_header;
> +
> +  /* True if none of the functions that are called by this function need
> +     stack space allocated for their arguments.  */
> +  bool optimize_call_stack;
> +
> +  /* True if one of the functions calling this function may not allocate
> +     a frame header.  */
> +  bool callers_may_not_allocate_frame;

The last three fields seem to be unused.

> +};
> +#endif
> +
> +/* As on most targets, we want the .eh_frame section to be read-only where
> +   possible.  And as on most targets, this means two things:
> +
> +     (a) Non-locally-binding pointers must have an indirect encoding,
> +	 so that the addresses in the .eh_frame section itself become
> +	 locally-binding.
> +
> +     (b) A shared library's .eh_frame section must encode locally-binding
> +	 pointers in a relative (relocation-free) form.
> +
> +   However, LoongArch has traditionally not allowed directives like:
> +
> +	.long	x-.
> +
> +   in cases where "x" is in a different section, or is not defined in the
> +   same assembly file.  We are therefore unable to emit the PC-relative
> +   form required by (b) at assembly time.

Is this really true for LoongArch?  It should be possible to use a more
efficient definition on modern targets.

> +
> +   Fortunately, the linker is able to convert absolute addresses into
> +   PC-relative addresses on our behalf.  Unfortunately, only certain
> +   versions of the linker know how to do this for indirect pointers,
> +   and for personality data.  We must fall back on using writable
> +   .eh_frame sections for shared libraries if the linker does not
> +   support this feature.  */
> +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
> +  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr)
> +
> +/* Several named LoongArch patterns depend on Pmode.  These patterns have the
> +   form <NAME>_si for Pmode == SImode and <NAME>_di for Pmode == DImode.
> +   Add the appropriate suffix to generator function NAME and invoke it
> +   with arguments ARGS.  */
> +#define PMODE_INSN(NAME, ARGS) \
> +  (Pmode == SImode ? NAME##_si ARGS : NAME##_di ARGS)

FWIW, the "@” .md notation mentioned above would be a more modern
way of doing this.

Thanks,
Richard
Lulu Cheng March 19, 2022, 9:35 a.m. UTC | #2
在 2022/3/8 上午2:17, Richard Sandiford 写道:
>> […]
>> +/* This definition replaces the formerly used 'm' constraint with a
>> +   different constraint letter in order to avoid changing semantics of
>> +   the 'm' constraint when accepting new address formats in
>> +   TARGET_LEGITIMATE_ADDRESS_P.  The constraint letter defined here
>> +   must not be used in insn definitions or inline assemblies.  */
>> +#define TARGET_MEM_CONSTRAINT 'w'
> Do you need to do this for a new port like Loongson?  It looks like
> TARGET_LEGITIMATE_ADDRESS_P accepts all valid forms of address,
> including reg+reg (good!), so shouldn't "m" accept them too?
>
> If there is already existing code that assumes that "m" is never indexed
> then the definition obviously makes sense.  But if you don't know of any
> such code then it would be better to make "m" accept all the things that
> TARGET_LEGITIMATE_ADDRESS_P does (by removing this definition).
>
Hi, Richard:

LoongArch supports memory modes as follows:

          mode

1. base  + index

2. base + imm12

3. base + imm16 (immediate 4-byte alignment)

4. base + 0

Because 'base + imm12' is most case, so we defined it to 'm'.


Thanks.
diff mbox series

Patch

diff --git a/gcc/config/host-linux.cc b/gcc/config/host-linux.cc
index ff4557d5f8d..817d3c0870c 100644
--- a/gcc/config/host-linux.cc
+++ b/gcc/config/host-linux.cc
@@ -98,6 +98,8 @@ 
 # define TRY_EMPTY_VM_SPACE	0x60000000
 #elif defined(__riscv) && defined (__LP64__)
 # define TRY_EMPTY_VM_SPACE	0x1000000000
+#elif defined(__loongarch__) && defined(__LP64__)
+# define TRY_EMPTY_VM_SPACE	0x8000000000
 #else
 # define TRY_EMPTY_VM_SPACE	0
 #endif
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
new file mode 100644
index 00000000000..39a06b87294
--- /dev/null
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -0,0 +1,240 @@ 
+/* Prototypes of target machine for GNU compiler.  LoongArch version.
+   Copyright (C) 2021-2022 Free Software Foundation, Inc.
+   Contributed by Loongson Ltd.
+   Based on MIPS target for GNU compiler.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_LOONGARCH_PROTOS_H
+#define GCC_LOONGARCH_PROTOS_H
+
+/* Describes how a symbol is used.
+
+   SYMBOL_CONTEXT_CALL
+       The symbol is used as the target of a call instruction.
+
+   SYMBOL_CONTEXT_LEA
+       The symbol is used in a load-address operation.
+
+   SYMBOL_CONTEXT_MEM
+       The symbol is used as the address in a MEM.  */
+enum loongarch_symbol_context {
+  SYMBOL_CONTEXT_CALL,
+  SYMBOL_CONTEXT_LEA,
+  SYMBOL_CONTEXT_MEM
+};
+
+/* Classifies a SYMBOL_REF, LABEL_REF or UNSPEC address.
+
+   SYMBOL_GOT_DISP
+       The symbol's value will be loaded directly from the GOT.
+
+   SYMBOL_TLS
+       A thread-local symbol.
+
+   SYMBOL_TLSGD
+   SYMBOL_TLSLDM
+       UNSPEC wrappers around SYMBOL_TLS, corresponding to the
+       thread-local storage relocation operators.
+   */
+enum loongarch_symbol_type {
+  SYMBOL_GOT_DISP,
+  SYMBOL_TLS,
+  SYMBOL_TLSGD,
+  SYMBOL_TLSLDM,
+};
+#define NUM_SYMBOL_TYPES (SYMBOL_TLSLDM + 1)
+
+/* Classifies a type of call.
+
+   LARCH_CALL_NORMAL
+	A normal call or call_value pattern.
+
+   LARCH_CALL_SIBCALL
+	A sibcall or sibcall_value pattern.
+
+   LARCH_CALL_EPILOGUE
+	A call inserted in the epilogue.  */
+enum loongarch_call_type {
+  LARCH_CALL_NORMAL,
+  LARCH_CALL_SIBCALL,
+  LARCH_CALL_EPILOGUE
+};
+
+/* Controls the conditions under which certain instructions are split.
+
+   SPLIT_IF_NECESSARY
+	Only perform splits that are necessary for correctness
+	(because no unsplit version exists).
+
+   SPLIT_FOR_SPEED
+	Perform splits that are necessary for correctness or
+	beneficial for code speed.
+
+   SPLIT_FOR_SIZE
+	Perform splits that are necessary for correctness or
+	beneficial for code size.  */
+enum loongarch_split_type {
+  SPLIT_IF_NECESSARY,
+  SPLIT_FOR_SPEED,
+  SPLIT_FOR_SIZE
+};
+
+extern const char *const loongarch_fp_conditions[16];
+
+/* Routines implemented in loongarch.c.  */
+extern rtx loongarch_emit_move (rtx, rtx);
+extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
+extern void loongarch_expand_prologue (void);
+extern void loongarch_expand_epilogue (bool);
+extern bool loongarch_can_use_return_insn (void);
+
+extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_context,
+					   enum loongarch_symbol_type *);
+extern int loongarch_regno_mode_ok_for_base_p (int, machine_mode, bool);
+extern int loongarch_address_insns (rtx, machine_mode, bool);
+extern int loongarch_const_insns (rtx);
+extern int loongarch_split_const_insns (rtx);
+extern int loongarch_split_128bit_const_insns (rtx);
+extern int loongarch_load_store_insns (rtx, rtx_insn *);
+extern int loongarch_idiv_insns (machine_mode);
+#ifdef RTX_CODE
+extern void loongarch_emit_binary (enum rtx_code, rtx, rtx, rtx);
+#endif
+extern bool loongarch_split_symbol (rtx, rtx, machine_mode, rtx *);
+extern rtx loongarch_unspec_address (rtx, enum loongarch_symbol_type);
+extern rtx loongarch_strip_unspec_address (rtx);
+extern void loongarch_move_integer (rtx, rtx, unsigned HOST_WIDE_INT);
+extern bool loongarch_legitimize_move (machine_mode, rtx, rtx);
+extern rtx loongarch_legitimize_call_address (rtx);
+
+extern rtx loongarch_subword (rtx, bool);
+extern bool loongarch_split_move_p (rtx, rtx, enum loongarch_split_type);
+extern void loongarch_split_move (rtx, rtx, enum loongarch_split_type, rtx);
+extern bool loongarch_split_move_insn_p (rtx, rtx, rtx);
+extern void loongarch_split_move_insn (rtx, rtx, rtx);
+extern const char *loongarch_output_move (rtx, rtx);
+extern bool loongarch_cfun_has_cprestore_slot_p (void);
+#ifdef RTX_CODE
+extern void loongarch_expand_scc (rtx *);
+extern void loongarch_expand_conditional_branch (rtx *);
+extern void loongarch_expand_conditional_move (rtx *);
+extern void loongarch_expand_conditional_trap (rtx);
+#endif
+extern void loongarch_set_return_address (rtx, rtx);
+extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
+extern bool loongarch_expand_block_move (rtx, rtx, rtx);
+
+extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+						    HOST_WIDE_INT, bool);
+extern bool loongarch_expand_ins_as_unaligned_store (rtx, rtx, HOST_WIDE_INT,
+						     HOST_WIDE_INT);
+extern HOST_WIDE_INT loongarch_debugger_offset (rtx, HOST_WIDE_INT);
+
+extern void loongarch_output_external (FILE *, tree, const char *);
+extern void loongarch_output_ascii (FILE *, const char *, size_t);
+extern void loongarch_output_aligned_decl_common (FILE *, tree, const char *,
+						  unsigned HOST_WIDE_INT,
+						  unsigned int);
+extern void loongarch_declare_common_object (FILE *, const char *,
+					     const char *,
+					     unsigned HOST_WIDE_INT,
+					     unsigned int, bool);
+extern void loongarch_declare_object (FILE *, const char *, const char *,
+				      const char *, ...) ATTRIBUTE_PRINTF_4;
+extern void loongarch_declare_object_name (FILE *, const char *, tree);
+extern void loongarch_finish_declare_object (FILE *, tree, int, int);
+extern void loongarch_set_text_contents_type (FILE *, const char *,
+					      unsigned long, bool);
+
+extern bool loongarch_small_data_pattern_p (rtx);
+extern rtx loongarch_rewrite_small_data (rtx);
+extern rtx loongarch_return_addr (int, rtx);
+
+extern enum reg_class loongarch_secondary_reload_class (enum reg_class,
+							machine_mode,
+							rtx, bool);
+extern int loongarch_class_max_nregs (enum reg_class, machine_mode);
+
+extern machine_mode loongarch_hard_regno_caller_save_mode (unsigned int,
+							   unsigned int,
+							   machine_mode);
+extern int loongarch_adjust_insn_length (rtx_insn *, int);
+extern const char *loongarch_output_conditional_branch (rtx_insn *, rtx *,
+							const char *,
+							const char *);
+extern const char *loongarch_output_order_conditional_branch (rtx_insn *,
+							      rtx *,
+							      bool);
+extern const char *loongarch_output_equal_conditional_branch (rtx_insn *,
+							      rtx *,
+							      bool);
+extern const char *loongarch_output_division (const char *, rtx *);
+extern const char *loongarch_output_probe_stack_range (rtx, rtx, rtx);
+extern bool loongarch_hard_regno_rename_ok (unsigned int, unsigned int);
+extern int loongarch_dspalu_bypass_p (rtx, rtx);
+extern rtx loongarch_prefetch_cookie (rtx, rtx);
+
+extern bool loongarch_global_symbol_p (const_rtx);
+extern bool loongarch_global_symbol_noweak_p (const_rtx);
+extern bool loongarch_weak_symbol_p (const_rtx);
+extern bool loongarch_symbol_binds_local_p (const_rtx);
+
+extern const char *current_section_name (void);
+extern unsigned int current_section_flags (void);
+extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
+
+union loongarch_gen_fn_ptrs
+{
+  rtx (*fn_8) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+  rtx (*fn_7) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+  rtx (*fn_6) (rtx, rtx, rtx, rtx, rtx, rtx);
+  rtx (*fn_5) (rtx, rtx, rtx, rtx, rtx);
+  rtx (*fn_4) (rtx, rtx, rtx, rtx);
+};
+
+extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs,
+					  rtx, rtx, rtx, rtx, rtx);
+
+extern bool loongarch_signed_immediate_p (unsigned HOST_WIDE_INT, int, int);
+extern bool loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT, int, int);
+extern bool loongarch_12bit_offset_address_p (rtx, machine_mode);
+extern bool loongarch_14bit_shifted_offset_address_p (rtx, machine_mode);
+extern rtx loongarch_expand_thread_pointer (rtx);
+
+extern bool loongarch_eh_uses (unsigned int);
+extern bool loongarch_epilogue_uses (unsigned int);
+extern bool loongarch_load_store_bonding_p (rtx *, machine_mode, bool);
+extern bool loongarch_split_symbol_type (enum loongarch_symbol_type);
+
+typedef rtx (*mulsidi3_gen_fn) (rtx, rtx, rtx);
+
+extern void loongarch_register_frame_header_opt (void);
+
+extern void loongarch_declare_function_name (FILE *, const char *, tree);
+
+/* Routines implemented in loongarch-c.c.  */
+void loongarch_cpu_cpp_builtins (cpp_reader *);
+
+extern void loongarch_init_builtins (void);
+extern void loongarch_atomic_assign_expand_fenv (tree *, tree *, tree *);
+extern tree loongarch_builtin_decl (unsigned int, bool);
+extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED,
+				     machine_mode, int);
+extern tree loongarch_build_builtin_va_list (void);
+
+#endif /* ! GCC_LOONGARCH_PROTOS_H */
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
new file mode 100644
index 00000000000..7067c2228bf
--- /dev/null
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -0,0 +1,72 @@ 
+/* Definitions for microarchitecture-related data structures.
+   Copyright (C) 2021-2022 Free Software Foundation, Inc.
+   Contributed by Loongson Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef LOONGARCH_TUNE_H
+#define LOONGARCH_TUNE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* RTX costs of various operations on the different architectures.  */
+struct loongarch_rtx_cost_data
+{
+  unsigned short fp_add;
+  unsigned short fp_mult_sf;
+  unsigned short fp_mult_df;
+  unsigned short fp_div_sf;
+  unsigned short fp_div_df;
+  unsigned short int_mult_si;
+  unsigned short int_mult_di;
+  unsigned short int_div_si;
+  unsigned short int_div_di;
+  unsigned short branch_cost;
+  unsigned short memory_latency;
+};
+
+/* Default RTX cost initializer.  */
+#define COSTS_N_INSNS(N) ((N) * 4)
+#define DEFAULT_COSTS				\
+    .fp_add		= COSTS_N_INSNS (1),	\
+    .fp_mult_sf		= COSTS_N_INSNS (2),	\
+    .fp_mult_df		= COSTS_N_INSNS (2),	\
+    .fp_div_sf		= COSTS_N_INSNS (4),	\
+    .fp_div_df		= COSTS_N_INSNS (4),	\
+    .int_mult_si	= COSTS_N_INSNS (1),	\
+    .int_mult_di	= COSTS_N_INSNS (1),	\
+    .int_div_si		= COSTS_N_INSNS (1),	\
+    .int_div_di		= COSTS_N_INSNS (1),	\
+    .branch_cost	= 2,			\
+    .memory_latency	= 4
+
+/* Costs to use when optimizing for size.  */
+extern const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size;
+
+/* Cache size record of known processor models.  */
+struct loongarch_cache {
+    int l1d_line_size;  /* bytes */
+    int l1d_size;       /* KiB */
+    int l2d_size;       /* kiB */
+};
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* LOONGARCH_TUNE_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
new file mode 100644
index 00000000000..7cc69e3f1d2
--- /dev/null
+++ b/gcc/config/loongarch/loongarch.cc
@@ -0,0 +1,6450 @@ 
+/* Subroutines used for LoongArch code generation.
+   Copyright (C) 2021-2022 Free Software Foundation, Inc.
+   Contributed by Loongson Ltd.
+   Based on MIPS and RISC-V target for GNU compiler.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "df.h"
+#include "tm_p.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "optabs.h"
+#include "regs.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "cgraph.h"
+#include "diagnostic.h"
+#include "insn-attr.h"
+#include "output.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "explow.h"
+#include "expr.h"
+#include "libfuncs.h"
+#include "reload.h"
+#include "common/common-target.h"
+#include "langhooks.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "sched-int.h"
+#include "gimplify.h"
+#include "target-globals.h"
+#include "tree-pass.h"
+#include "context.h"
+#include "builtins.h"
+#include "rtl-iter.h"
+
+/* This file should be included last.  */
+#include "target-def.h"
+
+/* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF.  */
+#define UNSPEC_ADDRESS_P(X)					\
+  (GET_CODE (X) == UNSPEC					\
+   && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST			\
+   && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
+
+/* Extract the symbol or label from UNSPEC wrapper X.  */
+#define UNSPEC_ADDRESS(X) XVECEXP (X, 0, 0)
+
+/* Extract the symbol type from UNSPEC wrapper X.  */
+#define UNSPEC_ADDRESS_TYPE(X) \
+  ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
+
+/* True if INSN is a loongarch.md pattern or asm statement.  */
+/* ???	This test exists through the compiler, perhaps it should be
+   moved to rtl.h.  */
+#define USEFUL_INSN_P(INSN)						\
+  (NONDEBUG_INSN_P (INSN)						\
+   && GET_CODE (PATTERN (INSN)) != USE					\
+   && GET_CODE (PATTERN (INSN)) != CLOBBER)
+
+/* True if bit BIT is set in VALUE.  */
+#define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0)
+
+/* Classifies an address.
+
+   ADDRESS_REG
+       A natural register + offset address.  The register satisfies
+       loongarch_valid_base_register_p and the offset is a const_arith_operand.
+
+   ADDRESS_REG_REG
+       A base register indexed by (optionally scaled) register.
+
+   ADDRESS_CONST_INT
+       A signed 16-bit constant address.
+
+   ADDRESS_SYMBOLIC:
+       A constant symbolic address.  */
+enum loongarch_address_type
+{
+  ADDRESS_REG,
+  ADDRESS_REG_REG,
+  ADDRESS_CONST_INT,
+  ADDRESS_SYMBOLIC
+};
+
+
+/* Information about an address described by loongarch_address_type.
+
+   ADDRESS_CONST_INT
+       No fields are used.
+
+   ADDRESS_REG
+       REG is the base register and OFFSET is the constant offset.
+
+   ADDRESS_SYMBOLIC
+       SYMBOL_TYPE is the type of symbol that the address references.  */
+struct loongarch_address_info
+{
+  enum loongarch_address_type type;
+  rtx reg;
+  rtx offset;
+  enum loongarch_symbol_type symbol_type;
+};
+
+/* Method of loading instant numbers:
+
+   METHOD_NORMAL:
+     Load 0-31 bit of the immediate number.
+
+   METHOD_LU32I:
+     Load 32-51 bit of the immediate number.
+
+   METHOD_LU52I:
+     Load 52-63 bit of the immediate number.
+
+   METHOD_INSV:
+     immediate like 0xfff00000fffffxxx
+   */
+enum loongarch_load_imm_method
+{
+  METHOD_NORMAL,
+  METHOD_LU32I,
+  METHOD_LU52I,
+  METHOD_INSV
+};
+
+struct loongarch_integer_op
+{
+  enum rtx_code code;
+  unsigned HOST_WIDE_INT value;
+  enum loongarch_load_imm_method method;
+};
+
+/* The largest number of operations needed to load an integer constant.
+   The worst accepted case for 64-bit constants is LU12I.W,LU32I.D,LU52I.D,ORI
+   or LU12I.W,LU32I.D,LU52I.D,ADDI.D DECL_ASSEMBLER_NAME.  */
+#define LARCH_MAX_INTEGER_OPS 4
+
+/* Arrays that map GCC register numbers to debugger register numbers.  */
+int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER];
+
+/* Index [M][R] is true if register R is allowed to hold a value of mode M.  */
+static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE]
+					  [FIRST_PSEUDO_REGISTER];
+
+/* Index C is true if character C is a valid PRINT_OPERAND punctation
+   character.  */
+static bool loongarch_print_operand_punct[256];
+
+/* Cached value of can_issue_more.  This is cached in loongarch_variable_issue
+   hook and returned from loongarch_sched_reorder2.  */
+static int cached_can_issue_more;
+
+/* Index R is the smallest register class that contains register R.  */
+const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = {
+    GR_REGS,	     GR_REGS,	      GR_REGS,	       GR_REGS,
+    JIRL_REGS,       JIRL_REGS,       JIRL_REGS,       JIRL_REGS,
+    JIRL_REGS,       JIRL_REGS,       JIRL_REGS,       JIRL_REGS,
+    SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,
+    SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,    SIBCALL_REGS,
+    SIBCALL_REGS,    GR_REGS,	      GR_REGS,	       JIRL_REGS,
+    JIRL_REGS,       JIRL_REGS,       JIRL_REGS,       JIRL_REGS,
+    JIRL_REGS,       JIRL_REGS,       JIRL_REGS,       JIRL_REGS,
+
+    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+    FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+    FCC_REGS,	FCC_REGS,	FCC_REGS,	FCC_REGS,
+    FCC_REGS,	FCC_REGS,	FCC_REGS,	FCC_REGS,
+    FRAME_REGS,	FRAME_REGS
+};
+
+/* The value of TARGET_ATTRIBUTE_TABLE.  */
+static const struct attribute_spec loongarch_attribute_table[] = {
+    /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
+       affects_type_identity, handler, exclude }  */
+      { NULL, 0, 0, false, false, false, false, NULL, NULL }
+};
+
+/* Which cost information to use.  */
+static const struct loongarch_rtx_cost_data *loongarch_cost;
+
+/* Information about a single argument.  */
+struct loongarch_arg_info
+{
+  /* True if the argument is at least partially passed on the stack.  */
+  bool stack_p;
+
+  /* The number of integer registers allocated to this argument.  */
+  unsigned int num_gprs;
+
+  /* The offset of the first register used, provided num_gprs is nonzero.
+     If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS.  */
+  unsigned int gpr_offset;
+
+  /* The number of floating-point registers allocated to this argument.  */
+  unsigned int num_fprs;
+
+  /* The offset of the first register used, provided num_fprs is nonzero.  */
+  unsigned int fpr_offset;
+};
+
+/* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Every parameter gets at
+   least PARM_BOUNDARY bits of alignment, but will be given anything up
+   to PREFERRED_STACK_BOUNDARY bits if the type requires it.  */
+
+static unsigned int
+loongarch_function_arg_boundary (machine_mode mode, const_tree type)
+{
+  unsigned int alignment;
+
+  /* Use natural alignment if the type is not aggregate data.  */
+  if (type && !AGGREGATE_TYPE_P (type))
+    alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
+  else
+    alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
+
+  return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
+}
+
+/* If MODE represents an argument that can be passed or returned in
+   floating-point registers, return the number of registers, else 0.  */
+
+static unsigned
+loongarch_pass_mode_in_fpr_p (machine_mode mode)
+{
+  if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
+    {
+      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+	return 1;
+
+      if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+	return 2;
+    }
+
+  return 0;
+}
+
+typedef struct
+{
+  const_tree type;
+  HOST_WIDE_INT offset;
+} loongarch_aggregate_field;
+
+/* Identify subfields of aggregates that are candidates for passing in
+   floating-point registers.  */
+
+static int
+loongarch_flatten_aggregate_field (const_tree type,
+				   loongarch_aggregate_field fields[2], int n,
+				   HOST_WIDE_INT offset)
+{
+  switch (TREE_CODE (type))
+    {
+    case RECORD_TYPE:
+      /* Can't handle incomplete types nor sizes that are not fixed.  */
+      if (!COMPLETE_TYPE_P (type)
+	  || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	  || !tree_fits_uhwi_p (TYPE_SIZE (type)))
+	return -1;
+
+      for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+	if (TREE_CODE (f) == FIELD_DECL)
+	  {
+	    if (!TYPE_P (TREE_TYPE (f)))
+	      return -1;
+
+	    HOST_WIDE_INT pos = offset + int_byte_position (f);
+	    n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n,
+						   pos);
+	    if (n < 0)
+	      return -1;
+	  }
+      return n;
+
+    case ARRAY_TYPE:
+      {
+	HOST_WIDE_INT n_elts;
+	loongarch_aggregate_field subfields[2];
+	tree index = TYPE_DOMAIN (type);
+	tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
+	int n_subfields = loongarch_flatten_aggregate_field (TREE_TYPE (type),
+							     subfields, 0,
+							     offset);
+
+	/* Can't handle incomplete types nor sizes that are not fixed.  */
+	if (n_subfields <= 0
+	    || !COMPLETE_TYPE_P (type)
+	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	    || !index
+	    || !TYPE_MAX_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
+	    || !TYPE_MIN_VALUE (index)
+	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
+	    || !tree_fits_uhwi_p (elt_size))
+	  return -1;
+
+	n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
+		 - tree_to_uhwi (TYPE_MIN_VALUE (index));
+	gcc_assert (n_elts >= 0);
+
+	for (HOST_WIDE_INT i = 0; i < n_elts; i++)
+	  for (int j = 0; j < n_subfields; j++)
+	    {
+	      if (n >= 2)
+		return -1;
+
+	      fields[n] = subfields[j];
+	      fields[n++].offset += i * tree_to_uhwi (elt_size);
+	    }
+
+	return n;
+      }
+
+    case COMPLEX_TYPE:
+      {
+	/* Complex type need consume 2 field, so n must be 0.  */
+	if (n != 0)
+	  return -1;
+
+	HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type)));
+
+	if (elt_size <= UNITS_PER_FP_ARG)
+	  {
+	    fields[0].type = TREE_TYPE (type);
+	    fields[0].offset = offset;
+	    fields[1].type = TREE_TYPE (type);
+	    fields[1].offset = offset + elt_size;
+
+	    return 2;
+	  }
+
+	return -1;
+      }
+
+    default:
+      if (n < 2
+	  && ((SCALAR_FLOAT_TYPE_P (type)
+	       && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG)
+	      || (INTEGRAL_TYPE_P (type)
+		  && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD)))
+	{
+	  fields[n].type = type;
+	  fields[n].offset = offset;
+	  return n + 1;
+	}
+      else
+	return -1;
+    }
+}
+
+/* Identify candidate aggregates for passing in floating-point registers.
+   Candidates have at most two fields after flattening.  */
+
+static int
+loongarch_flatten_aggregate_argument (const_tree type,
+				      loongarch_aggregate_field fields[2])
+{
+  if (!type || TREE_CODE (type) != RECORD_TYPE)
+    return -1;
+
+  return loongarch_flatten_aggregate_field (type, fields, 0, 0);
+}
+
+/* See whether TYPE is a record whose fields should be returned in one or
+   two floating-point registers.  If so, populate FIELDS accordingly.  */
+
+static unsigned
+loongarch_pass_aggregate_in_fpr_pair_p (const_tree type,
+					loongarch_aggregate_field fields[2])
+{
+  int n = loongarch_flatten_aggregate_argument (type, fields);
+
+  for (int i = 0; i < n; i++)
+    if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
+      return 0;
+
+  return n > 0 ? n : 0;
+}
+
+/* See whether TYPE is a record whose fields should be returned in one or
+   floating-point register and one integer register.  If so, populate
+   FIELDS accordingly.  */
+
+static bool
+loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
+					   loongarch_aggregate_field fields[2])
+{
+  unsigned num_int = 0, num_float = 0;
+  int n = loongarch_flatten_aggregate_argument (type, fields);
+
+  for (int i = 0; i < n; i++)
+    {
+      num_float += SCALAR_FLOAT_TYPE_P (fields[i].type);
+      num_int += INTEGRAL_TYPE_P (fields[i].type);
+    }
+
+  return num_int == 1 && num_float == 1;
+}
+
+/* Return the representation of an argument passed or returned in an FPR
+   when the value has mode VALUE_MODE and the type has TYPE_MODE.  The
+   two modes may be different for structures like:
+
+   struct __attribute__((packed)) foo { float f; }
+
+   where the SFmode value "f" is passed in REGNO but the struct itself
+   has mode BLKmode.  */
+
+static rtx
+loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno,
+			   machine_mode value_mode)
+{
+  rtx x = gen_rtx_REG (value_mode, regno);
+
+  if (type_mode != value_mode)
+    {
+      x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx);
+      x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
+    }
+  return x;
+}
+
+/* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
+   MODE is the mode of the composite.  MODE1 and OFFSET1 are the mode and
+   byte offset for the first value, likewise MODE2 and OFFSET2 for the
+   second value.  */
+
+static rtx
+loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1,
+			 machine_mode mode1, HOST_WIDE_INT offset1,
+			 unsigned regno2, machine_mode mode2,
+			 HOST_WIDE_INT offset2)
+{
+  return gen_rtx_PARALLEL (
+    mode, gen_rtvec (2,
+		     gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode1, regno1),
+					GEN_INT (offset1)),
+		     gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode2, regno2),
+					GEN_INT (offset2))));
+}
+
+/* Fill INFO with information about a single argument, and return an
+   RTL pattern to pass or return the argument.  CUM is the cumulative
+   state for earlier arguments.  MODE is the mode of this argument and
+   TYPE is its type (if known).  NAMED is true if this is a named
+   (fixed) argument rather than a variable one.  RETURN_P is true if
+   returning the argument, or false if passing the argument.  */
+
+static rtx
+loongarch_get_arg_info (struct loongarch_arg_info *info,
+			const CUMULATIVE_ARGS *cum, machine_mode mode,
+			const_tree type, bool named, bool return_p)
+{
+  unsigned num_bytes, num_words;
+  unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
+  unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
+  unsigned alignment = loongarch_function_arg_boundary (mode, type);
+
+  memset (info, 0, sizeof (*info));
+  info->gpr_offset = cum->num_gprs;
+  info->fpr_offset = cum->num_fprs;
+
+  if (named)
+    {
+      loongarch_aggregate_field fields[2];
+      unsigned fregno = fpr_base + info->fpr_offset;
+      unsigned gregno = gpr_base + info->gpr_offset;
+
+      /* Pass one- or two-element floating-point aggregates in FPRs.  */
+      if ((info->num_fprs
+	   = loongarch_pass_aggregate_in_fpr_pair_p (type, fields))
+	  && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
+	switch (info->num_fprs)
+	  {
+	  case 1:
+	    return loongarch_pass_fpr_single (mode, fregno,
+					      TYPE_MODE (fields[0].type));
+
+	  case 2:
+	    return loongarch_pass_fpr_pair (mode, fregno,
+					    TYPE_MODE (fields[0].type),
+					    fields[0].offset,
+					    fregno + 1,
+					    TYPE_MODE (fields[1].type),
+					    fields[1].offset);
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+      /* Pass real and complex floating-point numbers in FPRs.  */
+      if ((info->num_fprs = loongarch_pass_mode_in_fpr_p (mode))
+	  && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
+	switch (GET_MODE_CLASS (mode))
+	  {
+	  case MODE_FLOAT:
+	    return gen_rtx_REG (mode, fregno);
+
+	  case MODE_COMPLEX_FLOAT:
+	    return loongarch_pass_fpr_pair (mode, fregno,
+					    GET_MODE_INNER (mode), 0,
+					    fregno + 1, GET_MODE_INNER (mode),
+					    GET_MODE_UNIT_SIZE (mode));
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+      /* Pass structs with one float and one integer in an FPR and a GPR.  */
+      if (loongarch_pass_aggregate_in_fpr_and_gpr_p (type, fields)
+	  && info->gpr_offset < MAX_ARGS_IN_REGISTERS
+	  && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
+	{
+	  info->num_gprs = 1;
+	  info->num_fprs = 1;
+
+	  if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
+	    std::swap (fregno, gregno);
+
+	  return loongarch_pass_fpr_pair (mode, fregno,
+					  TYPE_MODE (fields[0].type),
+					  fields[0].offset, gregno,
+					  TYPE_MODE (fields[1].type),
+					  fields[1].offset);
+	}
+    }
+
+  /* Work out the size of the argument.  */
+  num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
+  num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+  /* Doubleword-aligned varargs start on an even register boundary.  */
+  if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
+    info->gpr_offset += info->gpr_offset & 1;
+
+  /* Partition the argument between registers and stack.  */
+  info->num_fprs = 0;
+  info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
+  info->stack_p = (num_words - info->num_gprs) != 0;
+
+  if (info->num_gprs || return_p)
+    return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
+
+  return NULL_RTX;
+}
+
+/* Implement TARGET_FUNCTION_ARG.  */
+
+static rtx
+loongarch_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  struct loongarch_arg_info info;
+
+  if (arg.end_marker_p ())
+    return NULL;
+
+  return loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named,
+				 false);
+}
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
+
+static void
+loongarch_function_arg_advance (cumulative_args_t cum_v,
+				const function_arg_info &arg)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  struct loongarch_arg_info info;
+
+  loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
+
+  /* Advance the register count.  This has the effect of setting
+     num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
+     argument required us to skip the final GPR and pass the whole
+     argument on the stack.  */
+  cum->num_fprs = info.fpr_offset + info.num_fprs;
+  cum->num_gprs = info.gpr_offset + info.num_gprs;
+}
+
+/* Implement TARGET_ARG_PARTIAL_BYTES.  */
+
+static int
+loongarch_arg_partial_bytes (cumulative_args_t cum,
+			     const function_arg_info &generic_arg)
+{
+  struct loongarch_arg_info arg;
+
+  loongarch_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
+			  generic_arg.type, generic_arg.named, false);
+  return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
+}
+
+/* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
+   VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
+   VALTYPE is null and MODE is the mode of the return value.  */
+
+static rtx
+loongarch_function_value_1 (const_tree type, const_tree func,
+			    machine_mode mode)
+{
+  struct loongarch_arg_info info;
+  CUMULATIVE_ARGS args;
+
+  if (type)
+    {
+      int unsigned_p = TYPE_UNSIGNED (type);
+
+      mode = TYPE_MODE (type);
+
+      /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
+	 return values, promote the mode here too.  */
+      mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
+    }
+
+  memset (&args, 0, sizeof (args));
+  return loongarch_get_arg_info (&info, &args, mode, type, true, true);
+}
+
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+
+static rtx
+loongarch_function_value (const_tree valtype, const_tree fn_decl_or_type,
+			  bool outgoing ATTRIBUTE_UNUSED)
+{
+  return loongarch_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
+}
+
+/* Implement TARGET_LIBCALL_VALUE.  */
+
+static rtx
+loongarch_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return loongarch_function_value_1 (NULL_TREE, NULL_TREE, mode);
+}
+
+
+/* Implement TARGET_PASS_BY_REFERENCE.  */
+
+static bool
+loongarch_pass_by_reference (cumulative_args_t cum_v,
+			     const function_arg_info &arg)
+{
+  HOST_WIDE_INT size = arg.type_size_in_bytes ();
+  struct loongarch_arg_info info;
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  /* ??? std_gimplify_va_arg_expr passes NULL for cum.  Fortunately, we
+     never pass variadic arguments in floating-point registers, so we can
+     avoid the call to loongarch_get_arg_info in this case.  */
+  if (cum != NULL)
+    {
+      /* Don't pass by reference if we can use a floating-point register.  */
+      loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named,
+			      false);
+      if (info.num_fprs)
+	return false;
+    }
+
+  /* Pass by reference if the data do not fit in two integer registers.  */
+  return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
+}
+
+/* Implement TARGET_RETURN_IN_MEMORY.  */
+
+static bool
+loongarch_return_in_memory (const_tree type,
+			    const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS args;
+  cumulative_args_t cum = pack_cumulative_args (&args);
+
+  /* The rules for returning in memory are the same as for passing the
+     first named argument by reference.  */
+  memset (&args, 0, sizeof (args));
+  function_arg_info arg (const_cast<tree> (type), /*named=*/true);
+  return loongarch_pass_by_reference (cum, arg);
+}
+
+/* Implement TARGET_SETUP_INCOMING_VARARGS.  */
+
+static void
+loongarch_setup_incoming_varargs (cumulative_args_t cum,
+				  const function_arg_info &arg,
+				  int *pretend_size ATTRIBUTE_UNUSED,
+				  int no_rtl)
+{
+  CUMULATIVE_ARGS local_cum;
+  int gp_saved;
+
+  /* The caller has advanced CUM up to, but not beyond, the last named
+     argument.  Advance a local copy of CUM past the last "real" named
+     argument, to find out how many registers are left over.  */
+  local_cum = *get_cumulative_args (cum);
+  loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg);
+
+  /* Found out how many registers we need to save.  */
+  gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
+
+  if (!no_rtl && gp_saved > 0)
+    {
+      rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
+			       REG_PARM_STACK_SPACE (cfun->decl)
+				 - gp_saved * UNITS_PER_WORD);
+      rtx mem = gen_frame_mem (BLKmode, ptr);
+      set_mem_alias_set (mem, get_varargs_alias_set ());
+
+      move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, mem, gp_saved);
+    }
+  if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
+    cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
+}
+
+/* Make the last instruction frame-related and note that it performs
+   the operation described by FRAME_PATTERN.  */
+
+static void
+loongarch_set_frame_expr (rtx frame_pattern)
+{
+  rtx insn;
+
+  insn = get_last_insn ();
+  RTX_FRAME_RELATED_P (insn) = 1;
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, frame_pattern,
+				      REG_NOTES (insn));
+}
+
+/* Return a frame-related rtx that stores REG at MEM.
+   REG must be a single register.  */
+
+static rtx
+loongarch_frame_set (rtx mem, rtx reg)
+{
+  rtx set = gen_rtx_SET (mem, reg);
+  RTX_FRAME_RELATED_P (set) = 1;
+  return set;
+}
+
+/* Return true if the current function must save register REGNO.  */
+
+static bool
+loongarch_save_reg_p (unsigned int regno)
+{
+  bool call_saved = !global_regs[regno] && !call_used_regs[regno];
+  bool might_clobber
+    = crtl->saves_all_registers || df_regs_ever_live_p (regno);
+
+  if (call_saved && might_clobber)
+    return true;
+
+  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
+    return true;
+
+  if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
+    return true;
+
+  return false;
+}
+
+/* Determine which GPR save/restore routine to call.  */
+
+static unsigned
+loongarch_save_libcall_count (unsigned mask)
+{
+  for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
+    if (BITSET_P (mask, n))
+      return CALLEE_SAVED_REG_NUMBER (n) + 1;
+  abort ();
+}
+
+/* Populate the current function's loongarch_frame_info structure.
+
+   LoongArch stack frames grown downward.  High addresses are at the top.
+
+     +-------------------------------+
+     |				     |
+     |  incoming stack arguments     |
+     |				     |
+     +-------------------------------+ <-- incoming stack pointer
+     |				     |
+     |  callee-allocated save area   |
+     |  for arguments that are       |
+     |  split between registers and  |
+     |  the stack		     |
+     |				     |
+     +-------------------------------+ <-- arg_pointer_rtx (virtual)
+     |				     |
+     |  callee-allocated save area   |
+     |  for register varargs	     |
+     |				     |
+     +-------------------------------+ <-- hard_frame_pointer_rtx;
+     |				     |     stack_pointer_rtx + gp_sp_offset
+     |  GPR save area		     |       + UNITS_PER_WORD
+     |				     |
+     +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
+     |				     |       + UNITS_PER_HWVALUE
+     |  FPR save area		     |
+     |				     |
+     +-------------------------------+ <-- frame_pointer_rtx (virtual)
+     |				     |
+     |  local variables		     |
+     |				     |
+   P +-------------------------------+
+     |				     |
+     |  outgoing stack arguments     |
+     |				     |
+     +-------------------------------+ <-- stack_pointer_rtx
+
+   Dynamic stack allocations such as alloca insert data at point P.
+   They decrease stack_pointer_rtx but leave frame_pointer_rtx and
+   hard_frame_pointer_rtx unchanged.  */
+
+static void
+loongarch_compute_frame_info (void)
+{
+  struct loongarch_frame_info *frame;
+  HOST_WIDE_INT offset;
+  unsigned int regno, i, num_x_saved = 0, num_f_saved = 0;
+
+  frame = &cfun->machine->frame;
+  memset (frame, 0, sizeof (*frame));
+
+  /* Find out which GPRs we need to save.  */
+  for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (loongarch_save_reg_p (regno))
+      frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
+
+  /* If this function calls eh_return, we must also save and restore the
+     EH data registers.  */
+  if (crtl->calls_eh_return)
+    for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
+      frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
+
+  /* Find out which FPRs we need to save.  This loop must iterate over
+     the same space as its companion in loongarch_for_each_saved_reg.  */
+  if (TARGET_HARD_FLOAT)
+    for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+      if (loongarch_save_reg_p (regno))
+	frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
+
+  /* At the bottom of the frame are any outgoing stack arguments.  */
+  offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size);
+  /* Next are local stack variables.  */
+  offset += LARCH_STACK_ALIGN (get_frame_size ());
+  /* The virtual frame pointer points above the local variables.  */
+  frame->frame_pointer_offset = offset;
+  /* Next are the callee-saved FPRs.  */
+  if (frame->fmask)
+    offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
+  frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
+  /* Next are the callee-saved GPRs.  */
+  if (frame->mask)
+    {
+      unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD);
+      unsigned num_save_restore
+	= 1 + loongarch_save_libcall_count (frame->mask);
+
+      /* Only use save/restore routines if they don't alter the stack size.  */
+      if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size)
+	frame->save_libcall_adjustment = x_save_size;
+
+      offset += x_save_size;
+    }
+  frame->gp_sp_offset = offset - UNITS_PER_WORD;
+  /* The hard frame pointer points above the callee-saved GPRs.  */
+  frame->hard_frame_pointer_offset = offset;
+  /* Above the hard frame pointer is the callee-allocated varags save area.  */
+  offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size);
+  /* Next is the callee-allocated area for pretend stack arguments.  */
+  offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size);
+  /* Arg pointer must be below pretend args, but must be above alignment
+     padding.  */
+  frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
+  frame->total_size = offset;
+  /* Next points the incoming stack pointer and any incoming arguments.  */
+
+  /* Only use save/restore routines when the GPRs are atop the frame.  */
+  if (frame->hard_frame_pointer_offset != frame->total_size)
+    frame->save_libcall_adjustment = 0;
+}
+
+/* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame pointer
+   or argument pointer.  TO is either the stack pointer or hard frame
+   pointer.  */
+
+HOST_WIDE_INT
+loongarch_initial_elimination_offset (int from, int to)
+{
+  HOST_WIDE_INT src, dest;
+
+  loongarch_compute_frame_info ();
+
+  if (to == HARD_FRAME_POINTER_REGNUM)
+    dest = cfun->machine->frame.hard_frame_pointer_offset;
+  else if (to == STACK_POINTER_REGNUM)
+    dest = 0; /* The stack pointer is the base of all offsets, hence 0.  */
+  else
+    gcc_unreachable ();
+
+  if (from == FRAME_POINTER_REGNUM)
+    src = cfun->machine->frame.frame_pointer_offset;
+  else if (from == ARG_POINTER_REGNUM)
+    src = cfun->machine->frame.arg_pointer_offset;
+  else
+    gcc_unreachable ();
+
+  return src - dest;
+}
+
+/* A function to save or store a register.  The first argument is the
+   register and the second is the stack slot.  */
+typedef void (*loongarch_save_restore_fn) (rtx, rtx);
+
+/* Use FN to save or restore register REGNO.  MODE is the register's
+   mode and OFFSET is the offset of its save slot from the current
+   stack pointer.  */
+
+static void
+loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset,
+			    loongarch_save_restore_fn fn)
+{
+  rtx mem;
+
+  mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
+  fn (gen_rtx_REG (mode, regno), mem);
+}
+
+/* Call FN for each register that is saved by the current function.
+   SP_OFFSET is the offset of the current stack pointer from the start
+   of the frame.  */
+
+static void
+loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
+			      loongarch_save_restore_fn fn)
+{
+  HOST_WIDE_INT offset;
+
+  /* Save the link register and s-registers.  */
+  offset = cfun->machine->frame.gp_sp_offset - sp_offset;
+  for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+      {
+	loongarch_save_restore_reg (word_mode, regno, offset, fn);
+	offset -= UNITS_PER_WORD;
+      }
+
+  /* This loop must iterate over the same space as its companion in
+     loongarch_compute_frame_info.  */
+  offset = cfun->machine->frame.fp_sp_offset - sp_offset;
+  for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+      {
+	machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+
+	loongarch_save_restore_reg (mode, regno, offset, fn);
+	offset -= GET_MODE_SIZE (mode);
+      }
+}
+
+/* Emit a move from SRC to DEST.  Assume that the move expanders can
+   handle all moves if !can_create_pseudo_p ().  The distinction is
+   important because, unlike emit_move_insn, the move expanders know
+   how to force Pmode objects into the constant pool even when the
+   constant pool address is not itself legitimate.  */
+
+rtx
+loongarch_emit_move (rtx dest, rtx src)
+{
+  return (can_create_pseudo_p () ? emit_move_insn (dest, src)
+				 : emit_move_insn_1 (dest, src));
+}
+
+/* Save register REG to MEM.  Make the instruction frame-related.  */
+
+static void
+loongarch_save_reg (rtx reg, rtx mem)
+{
+  loongarch_emit_move (mem, reg);
+  loongarch_set_frame_expr (loongarch_frame_set (mem, reg));
+}
+
+/* Restore register REG from MEM.  */
+
+static void
+loongarch_restore_reg (rtx reg, rtx mem)
+{
+  rtx insn = loongarch_emit_move (reg, mem);
+  rtx dwarf = NULL_RTX;
+  dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+  REG_NOTES (insn) = dwarf;
+
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* For stack frames that can't be allocated with a single ADDI instruction,
+   compute the best value to initially allocate.  It must at a minimum
+   allocate enough space to spill the callee-saved registers.  */
+
+static HOST_WIDE_INT
+loongarch_first_stack_step (struct loongarch_frame_info *frame)
+{
+  if (IMM12_OPERAND (frame->total_size))
+    return frame->total_size;
+
+  HOST_WIDE_INT min_first_step
+    = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
+  HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
+  HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
+  gcc_assert (min_first_step <= max_first_step);
+
+  /* As an optimization, use the least-significant bits of the total frame
+     size, so that the second adjustment step is just LU12I + ADD.  */
+  if (!IMM12_OPERAND (min_second_step)
+      && frame->total_size % IMM_REACH < IMM_REACH / 2
+      && frame->total_size % IMM_REACH >= min_first_step)
+    return frame->total_size % IMM_REACH;
+
+  return max_first_step;
+}
+
+static void
+loongarch_emit_stack_tie (void)
+{
+  if (Pmode == SImode)
+    emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
+  else
+    emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+#if PROBE_INTERVAL > 16384
+#error Cannot use indexed addressing mode for stack probing
+#endif
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+   inclusive.  These are offsets from the current stack pointer.  */
+
+static void
+loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+  /* See if we have a constant small number of probes to generate.  If so,
+     that's the easy case.  */
+  if ((TARGET_64BIT && (first + size <= 32768))
+      || (!TARGET_64BIT && (first + size <= 2048)))
+    {
+      HOST_WIDE_INT i;
+
+      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+	 it exceeds SIZE.  If only one probe is needed, this will not
+	 generate any code.  Then probe at FIRST + SIZE.  */
+      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+					 -(first + i)));
+
+      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+				       -(first + size)));
+    }
+
+  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+     extra careful with variables wrapping around because we might be at
+     the very top (or the very bottom) of the address space and we have
+     to be able to handle this case properly; in particular, we use an
+     equality test for the loop condition.  */
+  else
+    {
+      HOST_WIDE_INT rounded_size;
+      rtx r13 = LARCH_PROLOGUE_TEMP (Pmode);
+      rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
+      rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
+
+      /* Sanity check for the addressing mode we're going to use.  */
+      gcc_assert (first <= 16384);
+
+
+      /* Step 1: round SIZE to the previous multiple of the interval.  */
+
+      rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
+
+      /* TEST_ADDR = SP + FIRST */
+      if (first != 0)
+	{
+	  emit_move_insn (r14, GEN_INT (first));
+	  emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode,
+						      stack_pointer_rtx,
+						      r14)));
+	}
+      else
+	emit_move_insn (r13, stack_pointer_rtx);
+
+      /* Step 2: compute initial and final value of the loop counter.  */
+
+      emit_move_insn (r14, GEN_INT (PROBE_INTERVAL));
+      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+      if (rounded_size == 0)
+	emit_move_insn (r12, r13);
+      else
+	{
+	  emit_move_insn (r12, GEN_INT (rounded_size));
+	  emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12)));
+	  /* Step 3: the loop
+
+	     do
+	     {
+	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	     probe at TEST_ADDR
+	     }
+	     while (TEST_ADDR != LAST_ADDR)
+
+	     probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	     until it is equal to ROUNDED_SIZE.  */
+
+	  emit_insn (PMODE_INSN (gen_probe_stack_range, (r13, r13, r12, r14)));
+	}
+
+      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+	 that SIZE is equal to ROUNDED_SIZE.  */
+
+      if (size != rounded_size)
+	{
+	  if (TARGET_64BIT)
+	    emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
+	  else
+	    {
+	      HOST_WIDE_INT i;
+	      for (i = 2048; i < (size - rounded_size); i += 2048)
+		{
+		  emit_stack_probe (plus_constant (Pmode, r12, -i));
+		  emit_insn (gen_rtx_SET (r12,
+					  plus_constant (Pmode, r12, -2048)));
+		}
+	      rtx r1 = plus_constant (Pmode, r12,
+				      -(size - rounded_size - i + 2048));
+	      emit_stack_probe (r1);
+	    }
+	}
+    }
+
+  /* Make sure nothing is scheduled before we are done.  */
+  emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+   absolute addresses.  */
+const char *
+loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
+{
+  static int labelno = 0;
+  char loop_lab[32], tmp[64];
+  rtx xops[3];
+
+  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
+
+  /* Loop.  */
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+  xops[0] = reg1;
+  xops[1] = GEN_INT (-PROBE_INTERVAL);
+  xops[2] = reg3;
+  if (TARGET_64BIT)
+    output_asm_insn ("sub.d\t%0,%0,%2", xops);
+  else
+    output_asm_insn ("sub.w\t%0,%0,%2", xops);
+
+  /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch.  */
+  xops[1] = reg2;
+  strcpy (tmp, "bne\t%0,%1,");
+  if (TARGET_64BIT)
+    output_asm_insn ("st.d\t$r0,%0,0", xops);
+  else
+    output_asm_insn ("st.w\t$r0,%0,0", xops);
+  output_asm_insn (strcat (tmp, &loop_lab[1]), xops);
+
+  return "";
+}
+
+/* Expand the "prologue" pattern.  */
+
+void
+loongarch_expand_prologue (void)
+{
+  struct loongarch_frame_info *frame = &cfun->machine->frame;
+  HOST_WIDE_INT size = frame->total_size;
+  HOST_WIDE_INT tmp;
+  unsigned mask = frame->mask;
+  rtx insn;
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = size;
+
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+      || flag_stack_clash_protection)
+    {
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+	    {
+	      tmp = size - get_stack_check_protect ();
+	      loongarch_emit_probe_stack_range (get_stack_check_protect (),
+						tmp);
+	    }
+	}
+      else if (size > 0)
+	loongarch_emit_probe_stack_range (get_stack_check_protect (), size);
+    }
+
+  /* Save the registers.  */
+  if ((frame->mask | frame->fmask) != 0)
+    {
+      HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame));
+
+      insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (-step1));
+      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+      size -= step1;
+      loongarch_for_each_saved_reg (size, loongarch_save_reg);
+    }
+
+  frame->mask = mask; /* Undo the above fib.  */
+
+  /* Set up the frame pointer, if we're using one.  */
+  if (frame_pointer_needed)
+    {
+      insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (frame->hard_frame_pointer_offset - size));
+      RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+
+      loongarch_emit_stack_tie ();
+    }
+
+  /* Allocate the rest of the frame.  */
+  if (size > 0)
+    {
+      if (IMM12_OPERAND (-size))
+	{
+	  insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+				GEN_INT (-size));
+	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
+	}
+      else
+	{
+	  loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
+	  emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+				    LARCH_PROLOGUE_TEMP (Pmode)));
+
+	  /* Describe the effect of the previous instructions.  */
+	  insn = plus_constant (Pmode, stack_pointer_rtx, -size);
+	  insn = gen_rtx_SET (stack_pointer_rtx, insn);
+	  loongarch_set_frame_expr (insn);
+	}
+    }
+}
+
+/* Return nonzero if this function is known to have a null epilogue.
+   This allows the optimizer to omit jumps to jumps if no stack
+   was created.  */
+
+bool
+loongarch_can_use_return_insn (void)
+{
+  return reload_completed && cfun->machine->frame.total_size == 0;
+}
+
+/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
+   says which.  */
+
+void
+loongarch_expand_epilogue (bool sibcall_p)
+{
+  /* Split the frame into two.  STEP1 is the amount of stack we should
+     deallocate before restoring the registers.  STEP2 is the amount we
+     should deallocate afterwards.
+
+     Start off by assuming that no registers need to be restored.  */
+  struct loongarch_frame_info *frame = &cfun->machine->frame;
+  HOST_WIDE_INT step1 = frame->total_size;
+  HOST_WIDE_INT step2 = 0;
+  rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+  rtx insn;
+
+  /* We need to add memory barrier to prevent read from deallocated stack.  */
+  bool need_barrier_p
+    = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
+
+  if (!sibcall_p && loongarch_can_use_return_insn ())
+    {
+      emit_jump_insn (gen_return ());
+      return;
+    }
+
+  /* Move past any dynamic stack allocations.  */
+  if (cfun->calls_alloca)
+    {
+      /* Emit a barrier to prevent loads from a deallocated stack.  */
+      loongarch_emit_stack_tie ();
+      need_barrier_p = false;
+
+      rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
+      if (!IMM12_OPERAND (INTVAL (adjust)))
+	{
+	  loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
+	  adjust = LARCH_PROLOGUE_TEMP (Pmode);
+	}
+
+      insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
+				       hard_frame_pointer_rtx,
+				       adjust));
+
+      rtx dwarf = NULL_RTX;
+      rtx minus_offset = NULL_RTX;
+      minus_offset = GEN_INT (-frame->hard_frame_pointer_offset);
+      rtx cfa_adjust_value = gen_rtx_PLUS (Pmode,
+					   hard_frame_pointer_rtx,
+					   minus_offset);
+
+      rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
+      dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      REG_NOTES (insn) = dwarf;
+    }
+
+  /* If we need to restore registers, deallocate as much stack as
+     possible in the second step without going out of range.  */
+  if ((frame->mask | frame->fmask) != 0)
+    {
+      step2 = loongarch_first_stack_step (frame);
+      step1 -= step2;
+    }
+
+  /* Set TARGET to BASE + STEP1.  */
+  if (step1 > 0)
+    {
+      /* Emit a barrier to prevent loads from a deallocated stack.  */
+      loongarch_emit_stack_tie ();
+      need_barrier_p = false;
+
+      /* Get an rtx for STEP1 that we can add to BASE.  */
+      rtx adjust = GEN_INT (step1);
+      if (!IMM12_OPERAND (step1))
+	{
+	  loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
+	  adjust = LARCH_PROLOGUE_TEMP (Pmode);
+	}
+
+      insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       adjust));
+
+      rtx dwarf = NULL_RTX;
+      rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+					 GEN_INT (step2));
+
+      dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      REG_NOTES (insn) = dwarf;
+    }
+
+  /* Restore the registers.  */
+  loongarch_for_each_saved_reg (frame->total_size - step2,
+				loongarch_restore_reg);
+
+  if (need_barrier_p)
+    loongarch_emit_stack_tie ();
+
+  /* Deallocate the final bit of the frame.  */
+  if (step2 > 0)
+    {
+      insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       GEN_INT (step2)));
+
+      rtx dwarf = NULL_RTX;
+      rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx);
+      dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
+      RTX_FRAME_RELATED_P (insn) = 1;
+
+      REG_NOTES (insn) = dwarf;
+    }
+
+  /* Add in the __builtin_eh_return stack adjustment.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+			      EH_RETURN_STACKADJ_RTX));
+
+  if (!sibcall_p)
+    emit_jump_insn (gen_simple_return_internal (ra));
+}
+
+#define LU32I_B (0xfffffULL << 32)
+#define LU52I_B (0xfffULL << 52)
+
+/* Fill CODES with a sequence of rtl operations to load VALUE.
+   Return the number of operations needed.  */
+
+static unsigned int
+loongarch_build_integer (struct loongarch_integer_op *codes,
+			 HOST_WIDE_INT value)
+
+{
+  unsigned int cost = 0;
+
+  /* Get the lower 32 bits of the value.  */
+  HOST_WIDE_INT low_part = TARGET_64BIT ? value << 32 >> 32 : value;
+
+  if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part))
+    {
+      /* The value of the lower 32 bit be loaded with one instruction.
+	 lu12i.w.  */
+      codes[0].code = UNKNOWN;
+      codes[0].method = METHOD_NORMAL;
+      codes[0].value = low_part;
+      cost++;
+    }
+  else
+    {
+      /* lu12i.w + ior.  */
+      codes[0].code = UNKNOWN;
+      codes[0].method = METHOD_NORMAL;
+      codes[0].value = low_part & ~(IMM_REACH - 1);
+      cost++;
+      HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
+      if (iorv != 0)
+	{
+	  codes[1].code = IOR;
+	  codes[1].method = METHOD_NORMAL;
+	  codes[1].value = iorv;
+	  cost++;
+	}
+    }
+
+  if (TARGET_64BIT)
+    {
+      bool lu32i[2] = {(value & LU32I_B) == 0, (value & LU32I_B) == LU32I_B};
+      bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B};
+
+      int sign31 = (value & (1UL << 31)) >> 31;
+      /* Determine whether the upper 32 bits are sign-extended from the lower
+	 32 bits. If it is, the instructions to load the high order can be
+	 ommitted.  */
+      if (lu32i[sign31] && lu52i[sign31])
+	return cost;
+      /* Determine whether bits 32-51 are sign-extended from the lower 32
+	 bits. If so, directly load 52-63 bits.  */
+      else if (lu32i[sign31])
+	{
+	  codes[cost].method = METHOD_LU52I;
+	  codes[cost].value = (value >> 52) << 52;
+	  return cost + 1;
+	}
+
+      codes[cost].method = METHOD_LU32I;
+      codes[cost].value = ((value << 12) >> 44) << 32;
+      cost++;
+
+      /* Determine whether the 52-61 bits are sign-extended from the low order,
+	 and if not, load the 52-61 bits.  */
+      if (!lu52i[(value & (1ULL << 51)) >> 51])
+	{
+	  codes[cost].method = METHOD_LU52I;
+	  codes[cost].value = (value >> 52) << 52;
+	  cost++;
+	}
+    }
+
+  gcc_assert (cost <= LARCH_MAX_INTEGER_OPS);
+
+  return cost;
+}
+
+/* Fill CODES with a sequence of rtl operations to load VALUE.
+   Return the number of operations needed.
+   Split interger in loongarch_output_move.  */
+
+static unsigned int
+loongarch_integer_cost (HOST_WIDE_INT value)
+{
+  struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
+  return loongarch_build_integer (codes, value);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
+
+static bool
+loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  return loongarch_const_insns (x) > 0;
+}
+
+/* Return true if X is a thread-local symbol.  */
+
+static bool
+loongarch_tls_symbol_p (rtx x)
+{
+  return GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+/* Return true if SYMBOL_REF X is associated with a global symbol
+   (in the STB_GLOBAL sense).  */
+
+bool
+loongarch_global_symbol_p (const_rtx x)
+{
+  if (GET_CODE (x) == LABEL_REF)
+    return false;
+
+  const_tree decl = SYMBOL_REF_DECL (x);
+
+  if (!decl)
+    return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
+
+  /* Weakref symbols are not TREE_PUBLIC, but their targets are global
+     or weak symbols.  Relocations in the object file will be against
+     the target symbol, so it's that symbol's binding that matters here.  */
+  return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl));
+}
+
+bool
+loongarch_global_symbol_noweak_p (const_rtx x)
+{
+  if (GET_CODE (x) == LABEL_REF)
+    return false;
+
+  const_tree decl = SYMBOL_REF_DECL (x);
+
+  if (!decl)
+    return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
+
+  /* Weakref symbols are not TREE_PUBLIC, but their targets are global
+     or weak symbols.  Relocations in the object file will be against
+     the target symbol, so it's that symbol's binding that matters here.  */
+  return DECL_P (decl) && TREE_PUBLIC (decl);
+}
+
+bool
+loongarch_weak_symbol_p (const_rtx x)
+{
+  const_tree decl;
+  if (GET_CODE (x) == LABEL_REF || !(decl = SYMBOL_REF_DECL (x)))
+    return false;
+  return DECL_P (decl) && DECL_WEAK (decl);
+}
+
+/* Return true if SYMBOL_REF X binds locally.  */
+
+bool
+loongarch_symbol_binds_local_p (const_rtx x)
+{
+  if (GET_CODE (x) == LABEL_REF)
+    return false;
+
+  return (SYMBOL_REF_DECL (x) ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
+			      : SYMBOL_REF_LOCAL_P (x));
+}
+
+/* Return true if rtx constants of mode MODE should be put into a small
+   data section.  */
+
+static bool
+loongarch_rtx_constant_in_small_data_p (machine_mode mode)
+{
+  return (GET_MODE_SIZE (mode) <= g_switch_value);
+}
+
+/* Return the method that should be used to access SYMBOL_REF or
+   LABEL_REF X in context CONTEXT.  */
+
+static enum loongarch_symbol_type
+loongarch_classify_symbol (const_rtx x,
+			   enum loongarch_symbol_context context  \
+			   ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (x) == LABEL_REF)
+    {
+      return SYMBOL_GOT_DISP;
+    }
+
+  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+
+  if (SYMBOL_REF_TLS_MODEL (x))
+    return SYMBOL_TLS;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return SYMBOL_GOT_DISP;
+
+  return SYMBOL_GOT_DISP;
+}
+
+/* Return true if X is a symbolic constant that can be used in context
+   CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
+
+bool
+loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_context context,
+			       enum loongarch_symbol_type *symbol_type)
+{
+  rtx offset;
+
+  split_const (x, &x, &offset);
+  if (UNSPEC_ADDRESS_P (x))
+    {
+      *symbol_type = UNSPEC_ADDRESS_TYPE (x);
+      x = UNSPEC_ADDRESS (x);
+    }
+  else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
+    {
+      *symbol_type = loongarch_classify_symbol (x, context);
+      if (*symbol_type == SYMBOL_TLS)
+	return true;
+    }
+  else
+    return false;
+
+  if (offset == const0_rtx)
+    return true;
+
+  /* Check whether a nonzero offset is valid for the underlying
+     relocations.  */
+  switch (*symbol_type)
+    {
+      /* Fall through.  */
+
+    case SYMBOL_GOT_DISP:
+    case SYMBOL_TLSGD:
+    case SYMBOL_TLSLDM:
+    case SYMBOL_TLS:
+      return false;
+    }
+  gcc_unreachable ();
+}
+
+/* Like loongarch_symbol_insns We rely on the fact that, in the worst case.  */
+
+static int
+loongarch_symbol_insns_1 (enum loongarch_symbol_type type, machine_mode mode)
+{
+  switch (type)
+    {
+    case SYMBOL_GOT_DISP:
+      /* The constant will have to be loaded from the GOT before it
+	 is used in an address.  */
+      if (mode != MAX_MACHINE_MODE)
+	return 0;
+
+      /* Fall through.  */
+
+      return 3;
+
+    case SYMBOL_TLSGD:
+    case SYMBOL_TLSLDM:
+      return 1;
+
+    case SYMBOL_TLS:
+      /* We don't treat a bare TLS symbol as a constant.  */
+      return 0;
+    }
+  gcc_unreachable ();
+}
+
+/* If MODE is MAX_MACHINE_MODE, return the number of instructions needed
+   to load symbols of type TYPE into a register.  Return 0 if the given
+   type of symbol cannot be used as an immediate operand.
+
+   Otherwise, return the number of instructions needed to load or store
+   values of mode MODE to or from addresses of type TYPE.  Return 0 if
+   the given type of symbol is not valid in addresses.
+
+   In both cases, instruction counts are based off BASE_INSN_LENGTH.  */
+
+static int
+loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
+{
+  return loongarch_symbol_insns_1 (type, mode) * (1);
+}
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+
+static bool
+loongarch_cannot_force_const_mem (machine_mode mode, rtx x)
+{
+  enum loongarch_symbol_type type;
+  rtx base, offset;
+
+  /* As an optimization, reject constants that loongarch_legitimize_move
+     can expand inline.
+
+     Suppose we have a multi-instruction sequence that loads constant C
+     into register R.  If R does not get allocated a hard register, and
+     R is used in an operand that allows both registers and memory
+     references, reload will consider forcing C into memory and using
+     one of the instruction's memory alternatives.  Returning false
+     here will force it to use an input reload instead.  */
+  if (CONST_INT_P (x) && loongarch_legitimate_constant_p (mode, x))
+    return true;
+
+  split_const (x, &base, &offset);
+  if (loongarch_symbolic_constant_p (base, SYMBOL_CONTEXT_LEA, &type))
+    {
+      /* The same optimization as for CONST_INT.  */
+      if (IMM12_INT (offset)
+	  && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0)
+	return true;
+    }
+
+  /* TLS symbols must be computed by loongarch_legitimize_move.  */
+  if (tls_referenced_p (x))
+    return true;
+
+  return false;
+}
+
+/* Return true if register REGNO is a valid base register for mode MODE.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+int
+loongarch_regno_mode_ok_for_base_p (int regno,
+				    machine_mode mode ATTRIBUTE_UNUSED,
+				    bool strict_p)
+{
+  if (!HARD_REGISTER_NUM_P (regno))
+    {
+      if (!strict_p)
+	return true;
+      regno = reg_renumber[regno];
+    }
+
+  /* These fake registers will be eliminated to either the stack or
+     hard frame pointer, both of which are usually valid base registers.
+     Reload deals with the cases where the eliminated form isn't valid.  */
+  if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
+    return true;
+
+  return GP_REG_P (regno);
+}
+
+/* Return true if X is a valid base register for mode MODE.
+   STRICT_P is true if REG_OK_STRICT is in effect.  */
+
+static bool
+loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
+{
+  if (!strict_p && GET_CODE (x) == SUBREG)
+    x = SUBREG_REG (x);
+
+  return (REG_P (x)
+	  && loongarch_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
+}
+
+/* Return true if, for every base register BASE_REG, (plus BASE_REG X)
+   can address a value of mode MODE.  */
+
+static bool
+loongarch_valid_offset_p (rtx x, machine_mode mode)
+{
+  /* Check that X is a signed 12-bit number,
+   * or check that X is a signed 16-bit number
+   * and offset 4 byte aligned.  */
+  if (!(const_arith_operand (x, Pmode)
+	|| ((mode == E_SImode || mode == E_DImode)
+	    && const_imm16_operand (x, Pmode)
+	    && (loongarch_signed_immediate_p (INTVAL (x), 14, 2)))))
+    return false;
+
+  /* We may need to split multiword moves, so make sure that every word
+     is accessible.  */
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+      && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
+    return false;
+
+  return true;
+}
+
+static bool
+loongarch_valid_index_p (struct loongarch_address_info *info, rtx x,
+			  machine_mode mode, bool strict_p)
+{
+  rtx index;
+
+  if ((REG_P (x) || SUBREG_P (x))
+      && GET_MODE (x) == Pmode)
+    {
+      index = x;
+    }
+  else
+    return false;
+
+  if (!strict_p
+      && GET_CODE (index) == SUBREG
+      && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
+    index = SUBREG_REG (index);
+
+  if (loongarch_valid_base_register_p (index, mode, strict_p))
+    {
+      info->type = ADDRESS_REG_REG;
+      info->offset = index;
+      return true;
+    }
+
+  return false;
+}
+
+/* Return true if X is a valid address for machine mode MODE.  If it is,
+   fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
+   effect.  */
+
+static bool
+loongarch_classify_address (struct loongarch_address_info *info, rtx x,
+			    machine_mode mode, bool strict_p)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      info->type = ADDRESS_REG;
+      info->reg = x;
+      info->offset = const0_rtx;
+      return loongarch_valid_base_register_p (info->reg, mode, strict_p);
+
+    case PLUS:
+      if (loongarch_valid_base_register_p (XEXP (x, 0), mode, strict_p)
+	  && loongarch_valid_index_p (info, XEXP (x, 1), mode, strict_p))
+	{
+	  info->reg = XEXP (x, 0);
+	  return true;
+	}
+
+      if (loongarch_valid_base_register_p (XEXP (x, 1), mode, strict_p)
+	 && loongarch_valid_index_p (info, XEXP (x, 0), mode, strict_p))
+	{
+	  info->reg = XEXP (x, 1);
+	  return true;
+	}
+
+      info->type = ADDRESS_REG;
+      info->reg = XEXP (x, 0);
+      info->offset = XEXP (x, 1);
+      return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
+	      && loongarch_valid_offset_p (info->offset, mode));
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+
+static bool
+loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
+{
+  struct loongarch_address_info addr;
+
+  return loongarch_classify_address (&addr, x, mode, strict_p);
+}
+
+/* Return true if ADDR matches the pattern for the indexed address
+   instruction.  */
+
+static bool
+loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (addr) != PLUS
+      || !REG_P (XEXP (addr, 0))
+      || !REG_P (XEXP (addr, 1)))
+    return false;
+  return true;
+}
+
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at address X, assuming that BASE_INSN_LENGTH is the
+   length of one instruction.  Return 0 if X isn't valid for MODE.
+   Assume that multiword moves may need to be split into word moves
+   if MIGHT_SPLIT_P, otherwise assume that a single load or store is
+   enough.  */
+
+int
+loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+{
+  struct loongarch_address_info addr;
+  int factor;
+
+  if (!loongarch_classify_address (&addr, x, mode, false))
+    return 0;
+
+  /* BLKmode is used for single unaligned loads and stores and should
+     not count as a multiword mode.  (GET_MODE_SIZE (BLKmode) is pretty
+     meaningless, so we have to single it out as a special case one way
+     or the other.)  */
+  if (mode != BLKmode && might_split_p)
+    factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  else
+    factor = 1;
+
+  if (loongarch_classify_address (&addr, x, mode, false))
+    switch (addr.type)
+      {
+      case ADDRESS_REG:
+	return factor;
+
+      case ADDRESS_REG_REG:
+	return factor;
+
+      case ADDRESS_CONST_INT:
+	return factor;
+
+      case ADDRESS_SYMBOLIC:
+	return factor * loongarch_symbol_insns (addr.symbol_type, mode);
+      }
+  return 0;
+}
+
+/* Return true if X fits within an unsigned field of BITS bits that is
+   shifted left SHIFT bits before being used.  */
+
+bool
+loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits,
+				int shift = 0)
+{
+  return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits));
+}
+
+/* Return true if X fits within a signed field of BITS bits that is
+   shifted left SHIFT bits before being used.  */
+
+bool
+loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits,
+			      int shift = 0)
+{
+  x += 1 << (bits + shift - 1);
+  return loongarch_unsigned_immediate_p (x, bits, shift);
+}
+
+/* Return true if X is a legitimate address with a 12-bit offset.
+   MODE is the mode of the value being accessed.  */
+
+bool
+loongarch_12bit_offset_address_p (rtx x, machine_mode mode)
+{
+  struct loongarch_address_info addr;
+
+  return (loongarch_classify_address (&addr, x, mode, false)
+	  && addr.type == ADDRESS_REG
+	  && CONST_INT_P (addr.offset)
+	  && LARCH_U12BIT_OFFSET_P (INTVAL (addr.offset)));
+}
+
+/* Return true if X is a legitimate address with a 14-bit offset shifted 2.
+   MODE is the mode of the value being accessed.  */
+
+bool
+loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode)
+{
+  struct loongarch_address_info addr;
+
+  return (loongarch_classify_address (&addr, x, mode, false)
+	  && addr.type == ADDRESS_REG
+	  && CONST_INT_P (addr.offset)
+	  && LARCH_16BIT_OFFSET_P (INTVAL (addr.offset))
+	  && LARCH_SHIFT_2_OFFSET_P (INTVAL (addr.offset)));
+}
+
+/* Return the number of instructions needed to load constant X,
+   assuming that BASE_INSN_LENGTH is the length of one instruction.
+   Return 0 if X isn't a valid constant.  */
+
+int
+loongarch_const_insns (rtx x)
+{
+  enum loongarch_symbol_type symbol_type;
+  rtx offset;
+
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      return loongarch_integer_cost (INTVAL (x));
+
+    case CONST_VECTOR:
+      /* Fall through.  */
+    case CONST_DOUBLE:
+      /* Allow zeros for normal mode, where we can use $0.  */
+      return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+
+    case CONST:
+      /* See if we can refer to X directly.  */
+      if (loongarch_symbolic_constant_p (x, SYMBOL_CONTEXT_LEA, &symbol_type))
+	return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE);
+
+      /* Otherwise try splitting the constant into a base and offset.
+	 If the offset is a 12-bit value, we can load the base address
+	 into a register and then use ADDI.{W/D} to add in the offset.
+	 If the offset is larger, we can load the base and offset
+	 into separate registers and add them together with ADD.{W/D}.
+	 However, the latter is only possible before reload; during
+	 and after reload, we must have the option of forcing the
+	 constant into the pool instead.  */
+      split_const (x, &x, &offset);
+      if (offset != 0)
+	{
+	  int n = loongarch_const_insns (x);
+	  if (n != 0)
+	    {
+	      if (IMM12_INT (offset))
+		return n + 1;
+	      else if (!targetm.cannot_force_const_mem (GET_MODE (x), x))
+		return n + 1 + loongarch_integer_cost (INTVAL (offset));
+	    }
+	}
+      return 0;
+
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return loongarch_symbol_insns (
+	loongarch_classify_symbol (x, SYMBOL_CONTEXT_LEA), MAX_MACHINE_MODE);
+
+    default:
+      return 0;
+    }
+}
+
+/* X is a doubleword constant that can be handled by splitting it into
+   two words and loading each word separately.  Return the number of
+   instructions required to do this, assuming that BASE_INSN_LENGTH
+   is the length of one instruction.  */
+
+int
+loongarch_split_const_insns (rtx x)
+{
+  unsigned int low, high;
+
+  low = loongarch_const_insns (loongarch_subword (x, false));
+  high = loongarch_const_insns (loongarch_subword (x, true));
+  gcc_assert (low > 0 && high > 0);
+  return low + high;
+}
+
+/* Return the number of instructions needed to implement INSN,
+   given that it loads from or stores to MEM.  Assume that
+   BASE_INSN_LENGTH is the length of one instruction.  */
+
+int
+loongarch_load_store_insns (rtx mem, rtx_insn *insn)
+{
+  machine_mode mode;
+  bool might_split_p;
+  rtx set;
+
+  gcc_assert (MEM_P (mem));
+  mode = GET_MODE (mem);
+
+  /* Try to prove that INSN does not need to be split.  */
+  might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD;
+  if (might_split_p)
+    {
+      set = single_set (insn);
+      if (set
+	  && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set),
+					   insn))
+	might_split_p = false;
+    }
+
+  return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p);
+}
+
+/* Return the number of instructions needed for an integer division,
+   assuming that BASE_INSN_LENGTH is the length of one instruction.  */
+
+int
+loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED)
+{
+  int count;
+
+  count = 1;
+  if (TARGET_CHECK_ZERO_DIV)
+    count += 2;
+
+  return count;
+}
+
+/* Emit an instruction of the form (set TARGET (CODE OP0 OP1)).  */
+
+void
+loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1)
+{
+  emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (code, GET_MODE (target),
+						  op0, op1)));
+}
+
+/* Compute (CODE OP0 OP1) and store the result in a new register
+   of mode MODE.  Return that new register.  */
+
+static rtx
+loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0,
+			rtx op1)
+{
+  rtx reg;
+
+  reg = gen_reg_rtx (mode);
+  loongarch_emit_binary (code, reg, op0, op1);
+  return reg;
+}
+
+/* Copy VALUE to a register and return that register.  If new pseudos
+   are allowed, copy it into a new register, otherwise use DEST.  */
+
+static rtx
+loongarch_force_temporary (rtx dest, rtx value)
+{
+  if (can_create_pseudo_p ())
+    return force_reg (Pmode, value);
+  else
+    {
+      loongarch_emit_move (dest, value);
+      return dest;
+    }
+}
+
+/* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
+   then add CONST_INT OFFSET to the result.  */
+
+static rtx
+loongarch_unspec_address_offset (rtx base, rtx offset,
+				 enum loongarch_symbol_type symbol_type)
+{
+  base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
+			 UNSPEC_ADDRESS_FIRST + symbol_type);
+  if (offset != const0_rtx)
+    base = gen_rtx_PLUS (Pmode, base, offset);
+  return gen_rtx_CONST (Pmode, base);
+}
+
+/* Return an UNSPEC address with underlying address ADDRESS and symbol
+   type SYMBOL_TYPE.  */
+
+rtx
+loongarch_unspec_address (rtx address, enum loongarch_symbol_type symbol_type)
+{
+  rtx base, offset;
+
+  split_const (address, &base, &offset);
+  return loongarch_unspec_address_offset (base, offset, symbol_type);
+}
+
+/* If OP is an UNSPEC address, return the address to which it refers,
+   otherwise return OP itself.  */
+
+rtx
+loongarch_strip_unspec_address (rtx op)
+{
+  rtx base, offset;
+
+  split_const (op, &base, &offset);
+  if (UNSPEC_ADDRESS_P (base))
+    op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
+  return op;
+}
+
+/* Return a legitimate address for REG + OFFSET.  TEMP is as for
+   loongarch_force_temporary; it is only needed when OFFSET is not a
+   IMM12_OPERAND.  */
+
+static rtx
+loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
+{
+  if (!IMM12_OPERAND (offset))
+    {
+      rtx high;
+
+      /* Leave OFFSET as a 12-bit offset and put the excess in HIGH.
+	 The addition inside the macro CONST_HIGH_PART may cause an
+	 overflow, so we need to force a sign-extension check.  */
+      high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
+      offset = CONST_LOW_PART (offset);
+      high = loongarch_force_temporary (temp, high);
+      reg = loongarch_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
+    }
+  return plus_constant (Pmode, reg, offset);
+}
+
+/* The __tls_get_attr symbol.  */
+static GTY (()) rtx loongarch_tls_symbol;
+
+/* Load an entry from the GOT for a TLS GD access.  */
+
+static rtx
+loongarch_got_load_tls_gd (rtx dest, rtx sym)
+{
+  if (Pmode == DImode)
+    return gen_got_load_tls_gddi (dest, sym);
+  else
+    return gen_got_load_tls_gdsi (dest, sym);
+}
+
+/* Load an entry from the GOT for a TLS LD access.  */
+
+static rtx
+loongarch_got_load_tls_ld (rtx dest, rtx sym)
+{
+  if (Pmode == DImode)
+    return gen_got_load_tls_lddi (dest, sym);
+  else
+    return gen_got_load_tls_ldsi (dest, sym);
+}
+
+/* Load an entry from the GOT for a TLS IE access.  */
+
+static rtx
+loongarch_got_load_tls_ie (rtx dest, rtx sym)
+{
+  if (Pmode == DImode)
+    return gen_got_load_tls_iedi (dest, sym);
+  else
+    return gen_got_load_tls_iesi (dest, sym);
+}
+
+/* Add in the thread pointer for a TLS LE access.  */
+
+static rtx
+loongarch_got_load_tls_le (rtx dest, rtx sym)
+{
+  if (Pmode == DImode)
+    return gen_got_load_tls_ledi (dest, sym);
+  else
+    return gen_got_load_tls_lesi (dest, sym);
+}
+
+/* Return an instruction sequence that calls __tls_get_addr.  SYM is
+   the TLS symbol we are referencing and TYPE is the symbol type to use
+   (either global dynamic or local dynamic).  V0 is an RTX for the
+   return value location.  */
+
+static rtx_insn *
+loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+{
+  rtx loc, a0;
+  rtx_insn *insn;
+
+  a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
+
+  if (!loongarch_tls_symbol)
+    loongarch_tls_symbol = init_one_libfunc ("__tls_get_addr");
+
+  loc = loongarch_unspec_address (sym, type);
+
+  start_sequence ();
+
+  if (type == SYMBOL_TLSLDM)
+    emit_insn (loongarch_got_load_tls_ld (a0, loc));
+  else if (type == SYMBOL_TLSGD)
+    emit_insn (loongarch_got_load_tls_gd (a0, loc));
+  else
+    gcc_unreachable ();
+
+  insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
+						  const0_rtx));
+  RTL_CONST_CALL_P (insn) = 1;
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
+  insn = get_insns ();
+
+  end_sequence ();
+
+  return insn;
+}
+
+/* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
+   its address.  The return value will be both a valid address and a valid
+   SET_SRC (either a REG or a LO_SUM).  */
+
+static rtx
+loongarch_legitimize_tls_address (rtx loc)
+{
+  rtx dest, tp, tmp;
+  enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
+  rtx_insn *insn;
+
+  switch (model)
+    {
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      tmp = gen_rtx_REG (Pmode, GP_RETURN);
+      dest = gen_reg_rtx (Pmode);
+      insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp);
+      emit_libcall_block (insn, dest, tmp, loc);
+      break;
+
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      tmp = gen_rtx_REG (Pmode, GP_RETURN);
+      dest = gen_reg_rtx (Pmode);
+      insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp);
+      emit_libcall_block (insn, dest, tmp, loc);
+      break;
+
+    case TLS_MODEL_INITIAL_EXEC:
+      /* la.tls.ie; tp-relative add  */
+      tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+      tmp = gen_reg_rtx (Pmode);
+      emit_insn (loongarch_got_load_tls_ie (tmp, loc));
+      dest = gen_reg_rtx (Pmode);
+      emit_insn (gen_add3_insn (dest, tmp, tp));
+      break;
+
+    case TLS_MODEL_LOCAL_EXEC:
+      /* la.tls.le; tp-relative add  */
+      tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
+      tmp = gen_reg_rtx (Pmode);
+      emit_insn (loongarch_got_load_tls_le (tmp, loc));
+      dest = gen_reg_rtx (Pmode);
+      emit_insn (gen_add3_insn (dest, tmp, tp));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  return dest;
+}
+
+rtx
+loongarch_legitimize_call_address (rtx addr)
+{
+  if (!call_insn_operand (addr, VOIDmode))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+      loongarch_emit_move (reg, addr);
+      return reg;
+    }
+  return addr;
+}
+
+/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
+   and *OFFSET_PTR.  Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise.  */
+
+static void
+loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
+{
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
+    {
+      *base_ptr = XEXP (x, 0);
+      *offset_ptr = INTVAL (XEXP (x, 1));
+    }
+  else
+    {
+      *base_ptr = x;
+      *offset_ptr = 0;
+    }
+}
+
+/* If X is not a valid address for mode MODE, force it into a register.  */
+
+static rtx
+loongarch_force_address (rtx x, machine_mode mode)
+{
+  if (!loongarch_legitimate_address_p (mode, x, false))
+    x = force_reg (Pmode, x);
+  return x;
+}
+
+/* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
+   be legitimized in a way that the generic machinery might not expect,
+   return a new address, otherwise return NULL.  MODE is the mode of
+   the memory being accessed.  */
+
+static rtx
+loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			      machine_mode mode)
+{
+  rtx base, addr;
+  HOST_WIDE_INT offset;
+
+  if (loongarch_tls_symbol_p (x))
+    return loongarch_legitimize_tls_address (x);
+
+  /* Handle BASE + OFFSET using loongarch_add_offset.  */
+  loongarch_split_plus (x, &base, &offset);
+  if (offset != 0)
+    {
+      if (!loongarch_valid_base_register_p (base, mode, false))
+	base = copy_to_mode_reg (Pmode, base);
+      addr = loongarch_add_offset (NULL, base, offset);
+      return loongarch_force_address (addr, mode);
+    }
+
+  return x;
+}
+
+/* Load VALUE into DEST.  TEMP is as for loongarch_force_temporary.  */
+
+void
+loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
+{
+  struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
+  machine_mode mode;
+  unsigned int i, num_ops;
+  rtx x;
+
+  mode = GET_MODE (dest);
+  num_ops = loongarch_build_integer (codes, value);
+
+  /* Apply each binary operation to X.  Invariant: X is a legitimate
+     source operand for a SET pattern.  */
+  x = GEN_INT (codes[0].value);
+  for (i = 1; i < num_ops; i++)
+    {
+      if (!can_create_pseudo_p ())
+	{
+	  emit_insn (gen_rtx_SET (temp, x));
+	  x = temp;
+	}
+      else
+	x = force_reg (mode, x);
+
+      switch (codes[i].method)
+	{
+	case METHOD_NORMAL:
+	  x = gen_rtx_fmt_ee (codes[i].code, mode, x,
+			      GEN_INT (codes[i].value));
+	  break;
+	case METHOD_LU32I:
+	  emit_insn (
+	    gen_rtx_SET (x,
+			 gen_rtx_IOR (DImode,
+				      gen_rtx_ZERO_EXTEND (
+					DImode, gen_rtx_SUBREG (SImode, x, 0)),
+				      GEN_INT (codes[i].value))));
+	  break;
+	case METHOD_LU52I:
+	  emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff),
+				  GEN_INT (codes[i].value)));
+	  break;
+	case METHOD_INSV:
+	  emit_insn (
+	    gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20),
+					       GEN_INT (32)),
+			 gen_rtx_REG (DImode, 0)));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  emit_insn (gen_rtx_SET (dest, x));
+}
+
+/* Subroutine of loongarch_legitimize_move.  Move constant SRC into register
+   DEST given that SRC satisfies immediate_operand but doesn't satisfy
+   move_operand.  */
+
+static void
+loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
+{
+  rtx base, offset;
+
+  /* Split moves of big integers into smaller pieces.  */
+  if (splittable_const_int_operand (src, mode))
+    {
+      loongarch_move_integer (dest, dest, INTVAL (src));
+      return;
+    }
+
+  /* Generate the appropriate access sequences for TLS symbols.  */
+  if (loongarch_tls_symbol_p (src))
+    {
+      loongarch_emit_move (dest, loongarch_legitimize_tls_address (src));
+      return;
+    }
+
+  /* If we have (const (plus symbol offset)), and that expression cannot
+     be forced into memory, load the symbol first and add in the offset.
+     prefer to do this even if the constant _can_ be forced into memory,
+     as it usually produces better code.  */
+  split_const (src, &base, &offset);
+  if (offset != const0_rtx
+      && (targetm.cannot_force_const_mem (mode, src)
+	  || (can_create_pseudo_p ())))
+    {
+      base = loongarch_force_temporary (dest, base);
+      loongarch_emit_move (dest,
+			   loongarch_add_offset (NULL, base, INTVAL (offset)));
+      return;
+    }
+
+  src = force_const_mem (mode, src);
+
+  loongarch_emit_move (dest, src);
+}
+
+/* If (set DEST SRC) is not a valid move instruction, emit an equivalent
+   sequence that is valid.  */
+
+bool
+loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src)
+{
+  if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
+    {
+      loongarch_emit_move (dest, force_reg (mode, src));
+      return true;
+    }
+
+  /* Both src and dest are non-registers;  one special case is supported where
+     the source is (const_int 0) and the store can source the zero register.
+     */
+  if (!register_operand (dest, mode) && !register_operand (src, mode)
+      && !const_0_operand (src, mode))
+    {
+      loongarch_emit_move (dest, force_reg (mode, src));
+      return true;
+    }
+
+  /* We need to deal with constants that would be legitimate
+     immediate_operands but aren't legitimate move_operands.  */
+  if (CONSTANT_P (src) && !move_operand (src, mode))
+    {
+      loongarch_legitimize_const_move (mode, dest, src);
+      set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
+      return true;
+    }
+
+  return false;
+}
+
+/* Return true if OP refers to small data symbols directly, not through
+   a LO_SUM.  CONTEXT is the context in which X appears.  */
+
+static int
+loongarch_small_data_pattern_1 (rtx x,
+				enum loongarch_symbol_context context \
+				ATTRIBUTE_UNUSED)
+{
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
+    {
+      rtx x = *iter;
+
+      /* We make no particular guarantee about which symbolic constants are
+	 acceptable as asm operands versus which must be forced into a GPR.  */
+      if (GET_CODE (x) == ASM_OPERANDS)
+	iter.skip_subrtxes ();
+      else if (MEM_P (x))
+	{
+	  if (loongarch_small_data_pattern_1 (XEXP (x, 0), SYMBOL_CONTEXT_MEM))
+	    return true;
+	  iter.skip_subrtxes ();
+	}
+    }
+  return false;
+}
+
+/* Return true if OP refers to small data symbols directly, not through
+   a LO_SUM.  */
+
+bool
+loongarch_small_data_pattern_p (rtx op)
+{
+  return loongarch_small_data_pattern_1 (op, SYMBOL_CONTEXT_LEA);
+}
+
+/* Rewrite *LOC so that it refers to small data using explicit
+   relocations.  CONTEXT is the context in which *LOC appears.  */
+
+static void
+loongarch_rewrite_small_data_1 (rtx *loc,
+				enum loongarch_symbol_context context \
+				ATTRIBUTE_UNUSED)
+{
+  subrtx_ptr_iterator::array_type array;
+  FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
+    {
+      rtx *loc = *iter;
+      if (MEM_P (*loc))
+	{
+	  loongarch_rewrite_small_data_1 (&XEXP (*loc, 0), SYMBOL_CONTEXT_MEM);
+	  iter.skip_subrtxes ();
+	}
+    }
+}
+
+/* Rewrite instruction pattern PATTERN so that it refers to small data
+   using explicit relocations.  */
+
+rtx
+loongarch_rewrite_small_data (rtx pattern)
+{
+  pattern = copy_insn (pattern);
+  loongarch_rewrite_small_data_1 (&pattern, SYMBOL_CONTEXT_LEA);
+  return pattern;
+}
+
+/* The cost of loading values from the constant pool.  It should be
+   larger than the cost of any constant we want to synthesize inline.  */
+#define CONSTANT_POOL_COST COSTS_N_INSNS (8)
+
+/* Return true if there is a instruction that implements CODE
+   and if that instruction accepts X as an immediate operand.  */
+
+static int
+loongarch_immediate_operand_p (int code, HOST_WIDE_INT x)
+{
+  switch (code)
+    {
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* All shift counts are truncated to a valid constant.  */
+      return true;
+
+    case ROTATE:
+    case ROTATERT:
+      /* Likewise rotates, if the target supports rotates at all.  */
+      return true;
+
+    case AND:
+    case IOR:
+    case XOR:
+      /* These instructions take 12-bit unsigned immediates.  */
+      return IMM12_OPERAND_UNSIGNED (x);
+
+    case PLUS:
+    case LT:
+    case LTU:
+      /* These instructions take 12-bit signed immediates.  */
+      return IMM12_OPERAND (x);
+
+    case EQ:
+    case NE:
+    case GT:
+    case GTU:
+      /* The "immediate" forms of these instructions are really
+	 implemented as comparisons with register 0.  */
+      return x == 0;
+
+    case GE:
+    case GEU:
+      /* Likewise, meaning that the only valid immediate operand is 1.  */
+      return x == 1;
+
+    case LE:
+      /* We add 1 to the immediate and use SLT.  */
+      return IMM12_OPERAND (x + 1);
+
+    case LEU:
+      /* Likewise SLTU, but reject the always-true case.  */
+      return IMM12_OPERAND (x + 1) && x + 1 != 0;
+
+    case SIGN_EXTRACT:
+    case ZERO_EXTRACT:
+      /* The bit position and size are immediate operands.  */
+      return 1;
+
+    default:
+      /* By default assume that $0 can be used for 0.  */
+      return x == 0;
+    }
+}
+
+/* Return the cost of binary operation X, given that the instruction
+   sequence for a word-sized or smaller operation has cost SINGLE_COST
+   and that the sequence of a double-word operation has cost DOUBLE_COST.
+   If SPEED is true, optimize for speed otherwise optimize for size.  */
+
+static int
+loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed)
+{
+  int cost;
+
+  if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2)
+    cost = double_cost;
+  else
+    cost = single_cost;
+  return (cost
+	  + set_src_cost (XEXP (x, 0), GET_MODE (x), speed)
+	  + rtx_cost (XEXP (x, 1), GET_MODE (x), GET_CODE (x), 1, speed));
+}
+
+/* Return the cost of floating-point multiplications of mode MODE.  */
+
+static int
+loongarch_fp_mult_cost (machine_mode mode)
+{
+  return mode == DFmode ? loongarch_cost->fp_mult_df
+			: loongarch_cost->fp_mult_sf;
+}
+
+/* Return the cost of floating-point divisions of mode MODE.  */
+
+static int
+loongarch_fp_div_cost (machine_mode mode)
+{
+  return mode == DFmode ? loongarch_cost->fp_div_df
+			: loongarch_cost->fp_div_sf;
+}
+
+/* Return the cost of sign-extending OP to mode MODE, not including the
+   cost of OP itself.  */
+
+static int
+loongarch_sign_extend_cost (machine_mode mode, rtx op)
+{
+  if (MEM_P (op))
+    /* Extended loads are as cheap as unextended ones.  */
+    return 0;
+
+  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
+    /* A sign extension from SImode to DImode in 64-bit mode is free.  */
+    return 0;
+
+  return COSTS_N_INSNS (1);
+}
+
+/* Return the cost of zero-extending OP to mode MODE, not including the
+   cost of OP itself.  */
+
+static int
+loongarch_zero_extend_cost (machine_mode mode, rtx op)
+{
+  if (MEM_P (op))
+    /* Extended loads are as cheap as unextended ones.  */
+    return 0;
+
+  if (TARGET_64BIT && mode == DImode && GET_MODE (op) == SImode)
+    /* We need a shift left by 32 bits and a shift right by 32 bits.  */
+    return COSTS_N_INSNS (2);
+
+  /* We can use ANDI.  */
+  return COSTS_N_INSNS (1);
+}
+
+/* Return the cost of moving between two registers of mode MODE,
+   assuming that the move will be in pieces of at most UNITS bytes.  */
+
+static int
+loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units)
+{
+  return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
+}
+
+/* Return the cost of moving between two registers of mode MODE.  */
+
+static int
+loongarch_set_reg_reg_cost (machine_mode mode)
+{
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_CC:
+      return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode));
+
+    case MODE_FLOAT:
+    case MODE_COMPLEX_FLOAT:
+    case MODE_VECTOR_FLOAT:
+      if (TARGET_HARD_FLOAT)
+	return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE);
+      /* Fall through.  */
+
+    default:
+      return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD);
+    }
+}
+
+/* Implement TARGET_RTX_COSTS.  */
+
+static bool
+loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
+		     int opno ATTRIBUTE_UNUSED, int *total, bool speed)
+{
+  int code = GET_CODE (x);
+  bool float_mode_p = FLOAT_MODE_P (mode);
+  int cost;
+  rtx addr;
+
+  if (outer_code == COMPARE)
+    {
+      gcc_assert (CONSTANT_P (x));
+      *total = 0;
+      return true;
+    }
+
+  switch (code)
+    {
+    case CONST_INT:
+      if (TARGET_64BIT && outer_code == AND && UINTVAL (x) == 0xffffffff)
+	{
+	  *total = 0;
+	  return true;
+	}
+
+      /* When not optimizing for size, we care more about the cost
+	 of hot code, and hot code is often in a loop.  If a constant
+	 operand needs to be forced into a register, we will often be
+	 able to hoist the constant load out of the loop, so the load
+	 should not contribute to the cost.  */
+      if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x)))
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* Fall through.  */
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST_DOUBLE:
+      cost = loongarch_const_insns (x);
+      if (cost > 0)
+	{
+	  if (cost == 1 && outer_code == SET
+	      && !(float_mode_p && TARGET_HARD_FLOAT))
+	    cost = 0;
+	  else if ((outer_code == SET || GET_MODE (x) == VOIDmode))
+	    cost = 1;
+	  *total = COSTS_N_INSNS (cost);
+	  return true;
+	}
+      /* The value will need to be fetched from the constant pool.  */
+      *total = CONSTANT_POOL_COST;
+      return true;
+
+    case MEM:
+      /* If the address is legitimate, return the number of
+	 instructions it needs.  */
+      addr = XEXP (x, 0);
+      /* Check for a scaled indexed address.  */
+      if (loongarch_index_address_p (addr, mode))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+      cost = loongarch_address_insns (addr, mode, true);
+      if (cost > 0)
+	{
+	  *total = COSTS_N_INSNS (cost + 1);
+	  return true;
+	}
+      /* Otherwise use the default handling.  */
+      return false;
+
+    case FFS:
+      *total = COSTS_N_INSNS (6);
+      return false;
+
+    case NOT:
+      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1);
+      return false;
+
+    case AND:
+      /* Check for a *clear_upper32 pattern and treat it like a zero
+	 extension.  See the pattern's comment for details.  */
+      if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))
+	  && UINTVAL (XEXP (x, 1)) == 0xffffffff)
+	{
+	  *total = (loongarch_zero_extend_cost (mode, XEXP (x, 0))
+		    + set_src_cost (XEXP (x, 0), mode, speed));
+	  return true;
+	}
+      /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in
+	 a single instruction.  */
+      if (GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT)
+	{
+	  cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1;
+	  *total = (COSTS_N_INSNS (cost)
+		    + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
+		    + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
+	  return true;
+	}
+
+      /* Fall through.  */
+
+    case IOR:
+    case XOR:
+      /* Double-word operations use two single-word operations.  */
+      *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
+				      speed);
+      return true;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATE:
+    case ROTATERT:
+      if (CONSTANT_P (XEXP (x, 1)))
+	*total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
+					COSTS_N_INSNS (4), speed);
+      else
+	*total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
+					COSTS_N_INSNS (12), speed);
+      return true;
+
+    case ABS:
+      if (float_mode_p)
+	*total = loongarch_cost->fp_add;
+      else
+	*total = COSTS_N_INSNS (4);
+      return false;
+
+    case LT:
+    case LTU:
+    case LE:
+    case LEU:
+    case GT:
+    case GTU:
+    case GE:
+    case GEU:
+    case EQ:
+    case NE:
+    case UNORDERED:
+    case LTGT:
+    case UNGE:
+    case UNGT:
+    case UNLE:
+    case UNLT:
+      /* Branch comparisons have VOIDmode, so use the first operand's
+	 mode instead.  */
+      mode = GET_MODE (XEXP (x, 0));
+      if (FLOAT_MODE_P (mode))
+	{
+	  *total = loongarch_cost->fp_add;
+	  return false;
+	}
+      *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+				      speed);
+      return true;
+
+    case MINUS:
+    case PLUS:
+      if (float_mode_p)
+	{
+	  *total = loongarch_cost->fp_add;
+	  return false;
+	}
+
+      /* If it's an add + mult (which is equivalent to shift left) and
+	 it's immediate operand satisfies const_immalsl_operand predicate.  */
+      if ((mode == SImode || (TARGET_64BIT && mode == DImode))
+	  && GET_CODE (XEXP (x, 0)) == MULT)
+	{
+	  rtx op2 = XEXP (XEXP (x, 0), 1);
+	  if (const_immalsl_operand (op2, mode))
+	    {
+	      *total = (COSTS_N_INSNS (1)
+			+ set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
+			+ set_src_cost (XEXP (x, 1), mode, speed));
+	      return true;
+	    }
+	}
+
+      /* Double-word operations require three single-word operations and
+	 an SLTU.  */
+      *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
+				      speed);
+      return true;
+
+    case NEG:
+      if (float_mode_p)
+	*total = loongarch_cost->fp_add;
+      else
+	*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
+      return false;
+
+    case FMA:
+      *total = loongarch_fp_mult_cost (mode);
+      return false;
+
+    case MULT:
+      if (float_mode_p)
+	*total = loongarch_fp_mult_cost (mode);
+      else if (mode == DImode && !TARGET_64BIT)
+	*total = (speed
+		  ? loongarch_cost->int_mult_si * 3 + 6
+		  : COSTS_N_INSNS (7));
+      else if (!speed)
+	*total = COSTS_N_INSNS (1) + 1;
+      else if (mode == DImode)
+	*total = loongarch_cost->int_mult_di;
+      else
+	*total = loongarch_cost->int_mult_si;
+      return false;
+
+    case DIV:
+      /* Check for a reciprocal.  */
+      if (float_mode_p
+	  && flag_unsafe_math_optimizations
+	  && XEXP (x, 0) == CONST1_RTX (mode))
+	{
+	  if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT)
+	    /* An rsqrt<mode>a or rsqrt<mode>b pattern.  Count the
+	       division as being free.  */
+	    *total = set_src_cost (XEXP (x, 1), mode, speed);
+	  else
+	    *total = (loongarch_fp_div_cost (mode)
+		      + set_src_cost (XEXP (x, 1), mode, speed));
+	  return true;
+	}
+      /* Fall through.  */
+
+    case SQRT:
+    case MOD:
+      if (float_mode_p)
+	{
+	  *total = loongarch_fp_div_cost (mode);
+	  return false;
+	}
+      /* Fall through.  */
+
+    case UDIV:
+    case UMOD:
+      if (!speed)
+	{
+	  *total = COSTS_N_INSNS (loongarch_idiv_insns (mode));
+	}
+      else if (mode == DImode)
+	*total = loongarch_cost->int_div_di;
+      else
+	*total = loongarch_cost->int_div_si;
+      return false;
+
+    case SIGN_EXTEND:
+      *total = loongarch_sign_extend_cost (mode, XEXP (x, 0));
+      return false;
+
+    case ZERO_EXTEND:
+      *total = loongarch_zero_extend_cost (mode, XEXP (x, 0));
+      return false;
+    case TRUNCATE:
+      /* Costings for highpart multiplies.  Matching patterns of the form:
+
+	 (lshiftrt:DI (mult:DI (sign_extend:DI (...)
+			       (sign_extend:DI (...))
+		      (const_int 32)
+      */
+      if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT
+	   || GET_CODE (XEXP (x, 0)) == LSHIFTRT)
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32
+	       && GET_MODE (XEXP (x, 0)) == DImode)
+	      || (TARGET_64BIT
+		  && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
+		  && GET_MODE (XEXP (x, 0)) == TImode))
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
+	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
+	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
+		      == ZERO_EXTEND))))
+	{
+	  if (!speed)
+	    *total = COSTS_N_INSNS (1) + 1;
+	  else if (mode == DImode)
+	    *total = loongarch_cost->int_mult_di;
+	  else
+	    *total = loongarch_cost->int_mult_si;
+
+	  /* Sign extension is free, zero extension costs for DImode when
+	     on a 64bit core / when DMUL is present.  */
+	  for (int i = 0; i < 2; ++i)
+	    {
+	      rtx op = XEXP (XEXP (XEXP (x, 0), 0), i);
+	      if (TARGET_64BIT
+		  && GET_CODE (op) == ZERO_EXTEND
+		  && GET_MODE (op) == DImode)
+		*total += rtx_cost (op, DImode, MULT, i, speed);
+	      else
+		*total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), 0,
+				    speed);
+	    }
+
+	  return true;
+	}
+      return false;
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+      *total = loongarch_cost->fp_add;
+      return false;
+
+    case SET:
+      if (register_operand (SET_DEST (x), VOIDmode)
+	  && reg_or_0_operand (SET_SRC (x), VOIDmode))
+	{
+	  *total = loongarch_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
+	  return true;
+	}
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* Implement TARGET_ADDRESS_COST.  */
+
+static int
+loongarch_address_cost (rtx addr, machine_mode mode,
+			addr_space_t as ATTRIBUTE_UNUSED,
+			bool speed ATTRIBUTE_UNUSED)
+{
+  return loongarch_address_insns (addr, mode, false);
+}
+
+/* Return one word of double-word value OP, taking into account the fixed
+   endianness of certain registers.  HIGH_P is true to select the high part,
+   false to select the low part.  */
+
+rtx
+loongarch_subword (rtx op, bool high_p)
+{
+  unsigned int byte, offset;
+  machine_mode mode;
+
+  mode = GET_MODE (op);
+  if (mode == VOIDmode)
+    mode = TARGET_64BIT ? TImode : DImode;
+
+  if (high_p)
+    byte = UNITS_PER_WORD;
+  else
+    byte = 0;
+
+  if (FP_REG_RTX_P (op))
+    {
+      /* Paired FPRs are always ordered little-endian.  */
+      offset = (UNITS_PER_WORD < UNITS_PER_HWFPVALUE ? high_p : byte != 0);
+      return gen_rtx_REG (word_mode, REGNO (op) + offset);
+    }
+
+  if (MEM_P (op))
+    return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
+
+  return simplify_gen_subreg (word_mode, op, mode, byte);
+}
+
+/* Return true if a move from SRC to DEST should be split into two.
+   SPLIT_TYPE describes the split condition.  */
+
+bool
+loongarch_split_move_p (rtx dest, rtx src,
+			enum loongarch_split_type split_type ATTRIBUTE_UNUSED)
+{
+  /* FPR-to-FPR moves can be done in a single instruction, if they're
+     allowed at all.  */
+  unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
+  if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+    return false;
+
+  /* Check for floating-point loads and stores.  */
+  if (size == 8)
+    {
+      if (FP_REG_RTX_P (dest) && MEM_P (src))
+	return false;
+      if (FP_REG_RTX_P (src) && MEM_P (dest))
+	return false;
+    }
+  /* Otherwise split all multiword moves.  */
+  return size > UNITS_PER_WORD;
+}
+
+/* Split a move from SRC to DEST, given that loongarch_split_move_p holds.
+   SPLIT_TYPE describes the split condition.  */
+
+void
+loongarch_split_move (rtx dest, rtx src, enum loongarch_split_type split_type,
+		      rtx insn_)
+{
+  rtx low_dest;
+
+  gcc_checking_assert (loongarch_split_move_p (dest, src, split_type));
+  if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
+    {
+      if (!TARGET_64BIT && GET_MODE (dest) == DImode)
+	emit_insn (gen_move_doubleword_fprdi (dest, src));
+      else if (!TARGET_64BIT && GET_MODE (dest) == DFmode)
+	emit_insn (gen_move_doubleword_fprdf (dest, src));
+      else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
+	emit_insn (gen_move_doubleword_fprtf (dest, src));
+      else
+	gcc_unreachable ();
+    }
+  else
+    {
+      /* The operation can be split into two normal moves.  Decide in
+	 which order to do them.  */
+      low_dest = loongarch_subword (dest, false);
+      if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
+	{
+	  loongarch_emit_move (loongarch_subword (dest, true),
+			       loongarch_subword (src, true));
+	  loongarch_emit_move (low_dest, loongarch_subword (src, false));
+	}
+      else
+	{
+	  loongarch_emit_move (low_dest, loongarch_subword (src, false));
+	  loongarch_emit_move (loongarch_subword (dest, true),
+			       loongarch_subword (src, true));
+	}
+    }
+
+  /* This is a hack.  See if the next insn uses DEST and if so, see if we
+     can forward SRC for DEST.  This is most useful if the next insn is a
+     simple store.  */
+  rtx_insn *insn = (rtx_insn *) insn_;
+  struct loongarch_address_info addr = {};
+  if (insn)
+    {
+      rtx_insn *next = next_nonnote_nondebug_insn_bb (insn);
+      if (next)
+	{
+	  rtx set = single_set (next);
+	  if (set && SET_SRC (set) == dest)
+	    {
+	      if (MEM_P (src))
+		{
+		  rtx tmp = XEXP (src, 0);
+		  loongarch_classify_address (&addr, tmp, GET_MODE (tmp),
+					      true);
+		  if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg))
+		    validate_change (next, &SET_SRC (set), src, false);
+		}
+	      else
+		validate_change (next, &SET_SRC (set), src, false);
+	    }
+	}
+    }
+}
+
+/* Return the split type for instruction INSN.  */
+
+static enum loongarch_split_type
+loongarch_insn_split_type (rtx insn)
+{
+  basic_block bb = BLOCK_FOR_INSN (insn);
+  if (bb)
+    {
+      if (optimize_bb_for_speed_p (bb))
+	return SPLIT_FOR_SPEED;
+      else
+	return SPLIT_FOR_SIZE;
+    }
+  /* Once CFG information has been removed, we should trust the optimization
+     decisions made by previous passes and only split where necessary.  */
+  return SPLIT_IF_NECESSARY;
+}
+
+/* Return true if a move from SRC to DEST in INSN should be split.  */
+
+bool
+loongarch_split_move_insn_p (rtx dest, rtx src, rtx insn)
+{
+  return loongarch_split_move_p (dest, src, loongarch_insn_split_type (insn));
+}
+
+/* Split a move from SRC to DEST in INSN, given that
+   loongarch_split_move_insn_p holds.  */
+
+void
+loongarch_split_move_insn (rtx dest, rtx src, rtx insn)
+{
+  loongarch_split_move (dest, src, loongarch_insn_split_type (insn), insn);
+}
+
+/* Forward declaration.  Used below.  */
+static HOST_WIDE_INT
+loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align);
+
+const char *
+loongarch_output_move_index (rtx x, machine_mode mode, bool ldr)
+{
+  int index = exact_log2 (GET_MODE_SIZE (mode));
+  if (!IN_RANGE (index, 0, 3))
+    return NULL;
+
+  struct loongarch_address_info info;
+  if ((loongarch_classify_address (&info, x, mode, false)
+       && !(info.type == ADDRESS_REG_REG))
+      || !loongarch_legitimate_address_p (mode, x, false))
+    return NULL;
+
+  const char *const insn[][4] =
+    {
+      {
+	"stx.b\t%z1,%0",
+	"stx.h\t%z1,%0",
+	"stx.w\t%z1,%0",
+	"stx.d\t%z1,%0",
+      },
+      {
+	"ldx.bu\t%0,%1",
+	"ldx.hu\t%0,%1",
+	"ldx.w\t%0,%1",
+	"ldx.d\t%0,%1",
+      }
+    };
+
+  return insn[ldr][index];
+}
+
+const char *
+loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
+{
+  int index = exact_log2 (GET_MODE_SIZE (mode));
+  if (!IN_RANGE (index, 2, 3))
+    return NULL;
+
+  struct loongarch_address_info info;
+  if ((loongarch_classify_address (&info, x, mode, false)
+       && !(info.type == ADDRESS_REG_REG))
+      || !loongarch_legitimate_address_p (mode, x, false))
+    return NULL;
+
+  const char *const insn[][2] =
+    {
+	{
+	  "fstx.s\t%1,%0",
+	  "fstx.d\t%1,%0"
+	},
+	{
+	  "fldx.s\t%0,%1",
+	  "fldx.d\t%0,%1"
+	},
+    };
+
+  return insn[ldr][index-2];
+}
+/* Return the appropriate instructions to move SRC into DEST.  Assume
+   that SRC is operand 1 and DEST is operand 0.  */
+
+const char *
+loongarch_output_move (rtx dest, rtx src)
+{
+  enum rtx_code dest_code = GET_CODE (dest);
+  enum rtx_code src_code = GET_CODE (src);
+  machine_mode mode = GET_MODE (dest);
+  bool dbl_p = (GET_MODE_SIZE (mode) == 8);
+
+  if (loongarch_split_move_p (dest, src, SPLIT_IF_NECESSARY))
+    return "#";
+
+  if ((src_code == REG && GP_REG_P (REGNO (src)))
+      || (src == CONST0_RTX (mode)))
+    {
+      if (dest_code == REG)
+	{
+	  if (GP_REG_P (REGNO (dest)))
+	    return "or\t%0,%z1,$r0";
+
+	  if (FP_REG_P (REGNO (dest)))
+	    return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1";
+	}
+      if (dest_code == MEM)
+	{
+	  const char *insn = NULL;
+	  insn = loongarch_output_move_index (XEXP (dest, 0), GET_MODE (dest),
+					      false);
+	  if (insn)
+	    return insn;
+
+	  rtx offset = XEXP (dest, 0);
+	  if (GET_CODE (offset) == PLUS)
+	    offset = XEXP (offset, 1);
+	  switch (GET_MODE_SIZE (mode))
+	    {
+	    case 1:
+	      return "st.b\t%z1,%0";
+	    case 2:
+	      return "st.h\t%z1,%0";
+	    case 4:
+	      if (const_arith_operand (offset, Pmode))
+		return "st.w\t%z1,%0";
+	      else
+		return "stptr.w\t%z1,%0";
+	    case 8:
+	      if (const_arith_operand (offset, Pmode))
+		return "st.d\t%z1,%0";
+	      else
+		return "stptr.d\t%z1,%0";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+  if (dest_code == REG && GP_REG_P (REGNO (dest)))
+    {
+      if (src_code == REG)
+	if (FP_REG_P (REGNO (src)))
+	  return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1";
+
+      if (src_code == MEM)
+	{
+	  const char *insn = NULL;
+	  insn = loongarch_output_move_index (XEXP (src, 0), GET_MODE (src),
+					      true);
+	  if (insn)
+	    return insn;
+
+	  rtx offset = XEXP (src, 0);
+	  if (GET_CODE (offset) == PLUS)
+	    offset = XEXP (offset, 1);
+	  switch (GET_MODE_SIZE (mode))
+	    {
+	    case 1:
+	      return "ld.bu\t%0,%1";
+	    case 2:
+	      return "ld.hu\t%0,%1";
+	    case 4:
+	      if (const_arith_operand (offset, Pmode))
+		return "ld.w\t%0,%1";
+	      else
+		return "ldptr.w\t%0,%1";
+	    case 8:
+	      if (const_arith_operand (offset, Pmode))
+		return "ld.d\t%0,%1";
+	      else
+		return "ldptr.d\t%0,%1";
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      if (src_code == CONST_INT)
+	{
+	  if (LU12I_INT (src))
+	    return "lu12i.w\t%0,%1>>12\t\t\t# %X1";
+	  else if (IMM12_INT (src))
+	    return "addi.w\t%0,$r0,%1\t\t\t# %X1";
+	  else if (IMM12_INT_UNSIGNED (src))
+	    return "ori\t%0,$r0,%1\t\t\t# %X1";
+	  else if (LU52I_INT (src))
+	    return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1";
+	  else
+	    gcc_unreachable ();
+	}
+
+      if (symbolic_operand (src, VOIDmode))
+	{
+	  if ((TARGET_CMODEL_TINY && (!loongarch_global_symbol_p (src)
+				      || loongarch_symbol_binds_local_p (src)))
+	      || (TARGET_CMODEL_TINY_STATIC && !loongarch_weak_symbol_p (src)))
+	    {
+	      /* The symbol must be aligned to 4 byte.  */
+	      unsigned int align;
+
+	      if (GET_CODE (src) == LABEL_REF)
+		align = 128 /* Whatever.  */;
+	      else if (CONSTANT_POOL_ADDRESS_P (src))
+		align = GET_MODE_ALIGNMENT (get_pool_mode (src));
+	      else if (TREE_CONSTANT_POOL_ADDRESS_P (src))
+		{
+		  tree exp = SYMBOL_REF_DECL (src);
+		  align = TYPE_ALIGN (TREE_TYPE (exp));
+		  align = loongarch_constant_alignment (exp, align);
+		}
+	      else if (SYMBOL_REF_DECL (src))
+		align = DECL_ALIGN (SYMBOL_REF_DECL (src));
+	      else if (SYMBOL_REF_HAS_BLOCK_INFO_P (src)
+		       && SYMBOL_REF_BLOCK (src) != NULL)
+		align = SYMBOL_REF_BLOCK (src)->alignment;
+	      else
+		align = BITS_PER_UNIT;
+
+	      if (align % (4 * 8) == 0)
+		return "pcaddi\t%0,%%pcrel(%1)>>2";
+	    }
+	  if (TARGET_CMODEL_TINY
+	      || TARGET_CMODEL_TINY_STATIC
+	      || TARGET_CMODEL_NORMAL
+	      || TARGET_CMODEL_LARGE)
+	    {
+	      if (!loongarch_global_symbol_p (src)
+		  || loongarch_symbol_binds_local_p (src))
+		return "la.local\t%0,%1";
+	      else
+		return "la.global\t%0,%1";
+	    }
+	  if (TARGET_CMODEL_EXTREME)
+	    {
+	      sorry ("Normal symbol loading not implemented in extreme mode.");
+	      /* GCC complains.  */
+	      /* return ""; */
+	    }
+	}
+    }
+  if (src_code == REG && FP_REG_P (REGNO (src)))
+    {
+      if (dest_code == REG && FP_REG_P (REGNO (dest)))
+	return dbl_p ? "fmov.d\t%0,%1" : "fmov.s\t%0,%1";
+
+      if (dest_code == MEM)
+	{
+	  const char *insn = NULL;
+	  insn = loongarch_output_move_index_float (XEXP (dest, 0),
+						    GET_MODE (dest),
+						    false);
+	  if (insn)
+	    return insn;
+
+	  return dbl_p ? "fst.d\t%1,%0" : "fst.s\t%1,%0";
+	}
+    }
+  if (dest_code == REG && FP_REG_P (REGNO (dest)))
+    {
+      if (src_code == MEM)
+	{
+	  const char *insn = NULL;
+	  insn = loongarch_output_move_index_float (XEXP (src, 0),
+						    GET_MODE (src),
+						    true);
+	  if (insn)
+	    return insn;
+
+	  return dbl_p ? "fld.d\t%0,%1" : "fld.s\t%0,%1";
+	}
+    }
+  gcc_unreachable ();
+}
+
+/* Return true if CMP1 is a suitable second operand for integer ordering
+   test CODE.  */
+
+static bool
+loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
+{
+  switch (code)
+    {
+    case GT:
+    case GTU:
+      return reg_or_0_operand (cmp1, VOIDmode);
+
+    case GE:
+    case GEU:
+      return cmp1 == const1_rtx;
+
+    case LT:
+    case LTU:
+      return arith_operand (cmp1, VOIDmode);
+
+    case LE:
+      return sle_operand (cmp1, VOIDmode);
+
+    case LEU:
+      return sleu_operand (cmp1, VOIDmode);
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Return true if *CMP1 (of mode MODE) is a valid second operand for
+   integer ordering test *CODE, or if an equivalent combination can
+   be formed by adjusting *CODE and *CMP1.  When returning true, update
+   *CODE and *CMP1 with the chosen code and operand, otherwise leave
+   them alone.  */
+
+static bool
+loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
+				       machine_mode mode)
+{
+  HOST_WIDE_INT plus_one;
+
+  if (loongarch_int_order_operand_ok_p (*code, *cmp1))
+    return true;
+
+  if (CONST_INT_P (*cmp1))
+    switch (*code)
+      {
+      case LE:
+	plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
+	if (INTVAL (*cmp1) < plus_one)
+	  {
+	    *code = LT;
+	    *cmp1 = force_reg (mode, GEN_INT (plus_one));
+	    return true;
+	  }
+	break;
+
+      case LEU:
+	plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
+	if (plus_one != 0)
+	  {
+	    *code = LTU;
+	    *cmp1 = force_reg (mode, GEN_INT (plus_one));
+	    return true;
+	  }
+	break;
+
+      default:
+	break;
+      }
+  return false;
+}
+
+/* Compare CMP0 and CMP1 using ordering test CODE and store the result
+   in TARGET.  CMP0 and TARGET are register_operands.  If INVERT_PTR
+   is nonnull, it's OK to set TARGET to the inverse of the result and
+   flip *INVERT_PTR instead.  */
+
+static void
+loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
+			       rtx target, rtx cmp0, rtx cmp1)
+{
+  machine_mode mode;
+
+  /* First see if there is a LoongArch instruction that can do this operation.
+     If not, try doing the same for the inverse operation.  If that also
+     fails, force CMP1 into a register and try again.  */
+  mode = GET_MODE (cmp0);
+  if (loongarch_canonicalize_int_order_test (&code, &cmp1, mode))
+    loongarch_emit_binary (code, target, cmp0, cmp1);
+  else
+    {
+      enum rtx_code inv_code = reverse_condition (code);
+      if (!loongarch_canonicalize_int_order_test (&inv_code, &cmp1, mode))
+	{
+	  cmp1 = force_reg (mode, cmp1);
+	  loongarch_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
+	}
+      else if (invert_ptr == 0)
+	{
+	  rtx inv_target;
+
+	  inv_target = loongarch_force_binary (GET_MODE (target),
+					       inv_code, cmp0, cmp1);
+	  loongarch_emit_binary (XOR, target, inv_target, const1_rtx);
+	}
+      else
+	{
+	  *invert_ptr = !*invert_ptr;
+	  loongarch_emit_binary (inv_code, target, cmp0, cmp1);
+	}
+    }
+}
+
+/* Return a register that is zero if CMP0 and CMP1 are equal.
+   The register will have the same mode as CMP0.  */
+
+static rtx
+loongarch_zero_if_equal (rtx cmp0, rtx cmp1)
+{
+  if (cmp1 == const0_rtx)
+    return cmp0;
+
+  if (uns_arith_operand (cmp1, VOIDmode))
+    return expand_binop (GET_MODE (cmp0), xor_optab, cmp0, cmp1, 0, 0,
+			 OPTAB_DIRECT);
+
+  return expand_binop (GET_MODE (cmp0), sub_optab, cmp0, cmp1, 0, 0,
+		       OPTAB_DIRECT);
+}
+
+/* Allocate a floating-point condition-code register of mode MODE.  */
+
+static rtx
+loongarch_allocate_fcc (machine_mode mode)
+{
+  unsigned int regno, count;
+
+  gcc_assert (TARGET_HARD_FLOAT);
+
+  if (mode == FCCmode)
+    count = 1;
+  else
+    gcc_unreachable ();
+
+  cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1);
+  if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST)
+    cfun->machine->next_fcc = 0;
+
+  regno = FCC_REG_FIRST + cfun->machine->next_fcc;
+  cfun->machine->next_fcc += count;
+  return gen_rtx_REG (mode, regno);
+}
+
+/* Sign- or zero-extend OP0 and OP1 for integer comparisons.  */
+
+static void
+loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
+{
+  /* Comparisons consider all XLEN bits, so extend sub-XLEN values.  */
+  if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
+    {
+      /* TODO: checkout It is more profitable to zero-extend QImode values.  */
+      if (unsigned_condition (code) == code && GET_MODE (*op0) == QImode)
+	{
+	  *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
+	  if (CONST_INT_P (*op1))
+	    *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
+	  else
+	    *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
+	}
+      else
+	{
+	  *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0);
+	  if (*op1 != const0_rtx)
+	    *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1);
+	}
+    }
+}
+
+/* Convert a comparison into something that can be used in a branch.  On
+   entry, *OP0 and *OP1 are the values being compared and *CODE is the code
+   used to compare them.  Update them to describe the final comparison.  */
+
+static void
+loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+{
+  static const enum rtx_code
+  mag_comparisons[][2] = {{LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}};
+
+  if (splittable_const_int_operand (*op1, VOIDmode))
+    {
+      HOST_WIDE_INT rhs = INTVAL (*op1);
+
+      if (*code == EQ || *code == NE)
+	{
+	  /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0.  */
+	  if (IMM12_OPERAND (-rhs))
+	    {
+	      *op0 = loongarch_force_binary (GET_MODE (*op0), PLUS, *op0,
+					     GEN_INT (-rhs));
+	      *op1 = const0_rtx;
+	    }
+	}
+      else
+	{
+	  /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000).  */
+	  for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
+	    {
+	      HOST_WIDE_INT new_rhs;
+	      bool increment = *code == mag_comparisons[i][0];
+	      bool decrement = *code == mag_comparisons[i][1];
+	      if (!increment && !decrement)
+		continue;
+
+	      new_rhs = rhs + (increment ? 1 : -1);
+	      if (loongarch_integer_cost (new_rhs)
+		    < loongarch_integer_cost (rhs)
+		  && (rhs < 0) == (new_rhs < 0))
+		{
+		  *op1 = GEN_INT (new_rhs);
+		  *code = mag_comparisons[i][increment];
+		}
+	      break;
+	    }
+	}
+    }
+
+  loongarch_extend_comparands (*code, op0, op1);
+
+  *op0 = force_reg (word_mode, *op0);
+  if (*op1 != const0_rtx)
+    *op1 = force_reg (word_mode, *op1);
+}
+
+/* Like riscv_emit_int_compare, but for floating-point comparisons.  */
+
+static void
+loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
+{
+  rtx cmp_op0 = *op0;
+  rtx cmp_op1 = *op1;
+
+  /* Floating-point tests use a separate FCMP.cond.fmt
+     comparison to set a register.  The branch or conditional move will
+     then compare that register against zero.
+
+     Set CMP_CODE to the code of the comparison instruction and
+     *CODE to the code that the branch or move should use.  */
+  enum rtx_code cmp_code = *code;
+  /* Three FP conditions cannot be implemented by reversing the
+     operands for FCMP.cond.fmt, instead a reversed condition code is
+     required and a test for false.  */
+  *code = NE;
+  *op0 = loongarch_allocate_fcc (FCCmode);
+
+  *op1 = const0_rtx;
+  loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
+}
+
+/* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
+   and OPERAND[3].  Store the result in OPERANDS[0].
+
+   On 64-bit targets, the mode of the comparison and target will always be
+   SImode, thus possibly narrower than that of the comparison's operands.  */
+
+void
+loongarch_expand_scc (rtx operands[])
+{
+  rtx target = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+
+  gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT);
+
+  if (code == EQ || code == NE)
+    {
+      rtx zie = loongarch_zero_if_equal (op0, op1);
+      loongarch_emit_binary (code, target, zie, const0_rtx);
+    }
+  else
+    loongarch_emit_int_order_test (code, 0, target, op0, op1);
+}
+
+/* Compare OPERANDS[1] with OPERANDS[2] using comparison code
+   CODE and jump to OPERANDS[3] if the condition holds.  */
+
+void
+loongarch_expand_conditional_branch (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx condition;
+
+  if (FLOAT_MODE_P (GET_MODE (op1)))
+    loongarch_emit_float_compare (&code, &op0, &op1);
+  else
+    loongarch_emit_int_compare (&code, &op0, &op1);
+
+  condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+  emit_jump_insn (gen_condjump (condition, operands[3]));
+}
+
+/* Perform the comparison in OPERANDS[1].  Move OPERANDS[2] into OPERANDS[0]
+   if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0].  */
+
+void
+loongarch_expand_conditional_move (rtx *operands)
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = XEXP (operands[1], 0);
+  rtx op1 = XEXP (operands[1], 1);
+
+  if (FLOAT_MODE_P (GET_MODE (op1)))
+    loongarch_emit_float_compare (&code, &op0, &op1);
+  else
+    {
+      loongarch_extend_comparands (code, &op0, &op1);
+
+      op0 = force_reg (word_mode, op0);
+
+      if (code == EQ || code == NE)
+	{
+	  op0 = loongarch_zero_if_equal (op0, op1);
+	  op1 = const0_rtx;
+	}
+      else
+	{
+	  /* The comparison needs a separate scc instruction.  Store the
+	     result of the scc in *OP0 and compare it against zero.  */
+	  bool invert = false;
+	  rtx target = gen_reg_rtx (GET_MODE (op0));
+	  loongarch_emit_int_order_test (code, &invert, target, op0, op1);
+	  code = invert ? EQ : NE;
+	  op0 = target;
+	  op1 = const0_rtx;
+	}
+    }
+
+  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
+  /* There is no direct support for general conditional GP move involving
+     two registers using SEL.  */
+  if (INTEGRAL_MODE_P (GET_MODE (operands[2]))
+      && register_operand (operands[2], VOIDmode)
+      && register_operand (operands[3], VOIDmode))
+    {
+      machine_mode mode = GET_MODE (operands[0]);
+      rtx temp = gen_reg_rtx (mode);
+      rtx temp2 = gen_reg_rtx (mode);
+
+      emit_insn (gen_rtx_SET (temp,
+			      gen_rtx_IF_THEN_ELSE (mode, cond,
+						    operands[2], const0_rtx)));
+
+      /* Flip the test for the second operand.  */
+      cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
+
+      emit_insn (gen_rtx_SET (temp2,
+			      gen_rtx_IF_THEN_ELSE (mode, cond,
+						    operands[3], const0_rtx)));
+
+      /* Merge the two results, at least one is guaranteed to be zero.  */
+      emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+    }
+  else
+    emit_insn (gen_rtx_SET (operands[0],
+			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
+						  operands[2], operands[3])));
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+
+static void
+loongarch_va_start (tree valist, rtx nextarg)
+{
+  nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
+  std_expand_builtin_va_start (valist, nextarg);
+}
+
+/* Start a definition of function NAME.  */
+
+static void
+loongarch_start_function_definition (const char *name)
+{
+  ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, name, "function");
+
+  /* Start the definition proper.  */
+  assemble_name (asm_out_file, name);
+  fputs (":\n", asm_out_file);
+}
+
+/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
+
+static bool
+loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+				   tree exp ATTRIBUTE_UNUSED)
+{
+  /* Always OK.  */
+  return true;
+}
+
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+   Assume that the areas do not overlap.  */
+
+static void
+loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+{
+  HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT bits;
+  int i;
+  machine_mode mode;
+  rtx *regs;
+
+  bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)));
+
+  mode = int_mode_for_size (bits, 0).require ();
+  delta = bits / BITS_PER_UNIT;
+
+  /* Allocate a buffer for the temporary registers.  */
+  regs = XALLOCAVEC (rtx, length / delta);
+
+  /* Load as many BITS-sized chunks as possible.  Use a normal load if
+     the source has enough alignment, otherwise use left/right pairs.  */
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    {
+      regs[i] = gen_reg_rtx (mode);
+      loongarch_emit_move (regs[i], adjust_address (src, mode, offset));
+    }
+
+  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+    loongarch_emit_move (adjust_address (dest, mode, offset), regs[i]);
+
+  /* Mop up any left-over bytes.  */
+  if (offset < length)
+    {
+      src = adjust_address (src, BLKmode, offset);
+      dest = adjust_address (dest, BLKmode, offset);
+      move_by_pieces (dest, src, length - offset,
+		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)),
+		      (enum memop_ret) 0);
+    }
+}
+
+/* Helper function for doing a loop-based block operation on memory
+   reference MEM.  Each iteration of the loop will operate on LENGTH
+   bytes of MEM.
+
+   Create a new base register for use within the loop and point it to
+   the start of MEM.  Create a new memory reference that uses this
+   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+
+static void
+loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
+			    rtx *loop_mem)
+{
+  *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+
+  /* Although the new mem does not refer to a known location,
+     it does keep up to LENGTH bytes of alignment.  */
+  *loop_mem = change_address (mem, BLKmode, *loop_reg);
+  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+}
+
+/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
+   the memory regions do not overlap.  */
+
+static void
+loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+			   HOST_WIDE_INT bytes_per_iter)
+{
+  rtx_code_label *label;
+  rtx src_reg, dest_reg, final_src, test;
+  HOST_WIDE_INT leftover;
+
+  leftover = length % bytes_per_iter;
+  length -= leftover;
+
+  /* Create registers and memory references for use within the loop.  */
+  loongarch_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+  loongarch_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+
+  /* Calculate the value that SRC_REG should have after the last iteration
+     of the loop.  */
+  final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), 0,
+				   0, OPTAB_WIDEN);
+
+  /* Emit the start of the loop.  */
+  label = gen_label_rtx ();
+  emit_label (label);
+
+  /* Emit the loop body.  */
+  loongarch_block_move_straight (dest, src, bytes_per_iter);
+
+  /* Move on to the next block.  */
+  loongarch_emit_move (src_reg,
+		       plus_constant (Pmode, src_reg, bytes_per_iter));
+  loongarch_emit_move (dest_reg,
+		       plus_constant (Pmode, dest_reg, bytes_per_iter));
+
+  /* Emit the loop condition.  */
+  test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+  if (Pmode == DImode)
+    emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
+  else
+    emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+
+  /* Mop up any left-over bytes.  */
+  if (leftover)
+    loongarch_block_move_straight (dest, src, leftover);
+  else
+    /* Temporary fix for PR79150.  */
+    emit_insn (gen_nop ());
+}
+
+/* Expand a cpymemsi instruction, which copies LENGTH bytes from
+   memory reference SRC to memory reference DEST.  */
+
+bool
+loongarch_expand_block_move (rtx dest, rtx src, rtx length)
+{
+  int max_move_bytes = LARCH_MAX_MOVE_BYTES_STRAIGHT;
+
+  if (CONST_INT_P (length)
+      && INTVAL (length) <= loongarch_max_inline_memcpy_size)
+    {
+      if (INTVAL (length) <= max_move_bytes)
+	{
+	  loongarch_block_move_straight (dest, src, INTVAL (length));
+	  return true;
+	}
+      else if (optimize)
+	{
+	  loongarch_block_move_loop (dest, src, INTVAL (length),
+				     LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER);
+	  return true;
+	}
+    }
+  return false;
+}
+
+/* Expand a QI or HI mode atomic memory operation.
+
+   GENERATOR contains a pointer to the gen_* function that generates
+   the SI mode underlying atomic operation using masks that we
+   calculate.
+
+   RESULT is the return register for the operation.  Its value is NULL
+   if unused.
+
+   MEM is the location of the atomic access.
+
+   OLDVAL is the first operand for the operation.
+
+   NEWVAL is the optional second operand for the operation.  Its value
+   is NULL if unused.  */
+
+void
+loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
+			      rtx result, rtx mem, rtx oldval, rtx newval,
+			      rtx model)
+{
+  rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask;
+  rtx unshifted_mask_reg, mask, inverted_mask, si_op;
+  rtx res = NULL;
+  machine_mode mode;
+
+  mode = GET_MODE (mem);
+
+  /* Compute the address of the containing SImode value.  */
+  orig_addr = force_reg (Pmode, XEXP (mem, 0));
+  memsi_addr = loongarch_force_binary (Pmode, AND, orig_addr,
+				       force_reg (Pmode, GEN_INT (-4)));
+
+  /* Create a memory reference for it.  */
+  memsi = gen_rtx_MEM (SImode, memsi_addr);
+  set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
+  MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
+
+  /* Work out the byte offset of the QImode or HImode value,
+     counting from the least significant byte.  */
+  shift = loongarch_force_binary (Pmode, AND, orig_addr, GEN_INT (3));
+  /* Multiply by eight to convert the shift value from bytes to bits.  */
+  loongarch_emit_binary (ASHIFT, shift, shift, GEN_INT (3));
+
+  /* Make the final shift an SImode value, so that it can be used in
+     SImode operations.  */
+  shiftsi = force_reg (SImode, gen_lowpart (SImode, shift));
+
+  /* Set MASK to an inclusive mask of the QImode or HImode value.  */
+  unshifted_mask = GEN_INT (GET_MODE_MASK (mode));
+  unshifted_mask_reg = force_reg (SImode, unshifted_mask);
+  mask = loongarch_force_binary (SImode, ASHIFT, unshifted_mask_reg, shiftsi);
+
+  /* Compute the equivalent exclusive mask.  */
+  inverted_mask = gen_reg_rtx (SImode);
+  emit_insn (gen_rtx_SET (inverted_mask, gen_rtx_NOT (SImode, mask)));
+
+  /* Shift the old value into place.  */
+  if (oldval != const0_rtx)
+    {
+      oldval = convert_modes (SImode, mode, oldval, true);
+      oldval = force_reg (SImode, oldval);
+      oldval = loongarch_force_binary (SImode, ASHIFT, oldval, shiftsi);
+    }
+
+  /* Do the same for the new value.  */
+  if (newval && newval != const0_rtx)
+    {
+      newval = convert_modes (SImode, mode, newval, true);
+      newval = force_reg (SImode, newval);
+      newval = loongarch_force_binary (SImode, ASHIFT, newval, shiftsi);
+    }
+
+  /* Do the SImode atomic access.  */
+  if (result)
+    res = gen_reg_rtx (SImode);
+
+  if (newval)
+    si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval,
+			    model);
+  else if (result)
+    si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, model);
+  else
+    si_op = generator.fn_5 (memsi, mask, inverted_mask, oldval, model);
+
+  emit_insn (si_op);
+
+  if (result)
+    {
+      /* Shift and convert the result.  */
+      loongarch_emit_binary (AND, res, res, mask);
+      loongarch_emit_binary (LSHIFTRT, res, res, shiftsi);
+      loongarch_emit_move (result, gen_lowpart (GET_MODE (result), res));
+    }
+}
+
+/* Return true if (zero_extract OP WIDTH BITPOS) can be used as the
+   source of an "ext" instruction or the destination of an "ins"
+   instruction.  OP must be a register operand and the following
+   conditions must hold:
+
+   0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op))
+   0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
+   0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
+
+   Also reject lengths equal to a word as they are better handled
+   by the move patterns.  */
+
+bool
+loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
+{
+  if (!register_operand (op, VOIDmode)
+      || GET_MODE_BITSIZE (GET_MODE (op)) > BITS_PER_WORD)
+    return false;
+
+  if (!IN_RANGE (width, 1, GET_MODE_BITSIZE (GET_MODE (op)) - 1))
+    return false;
+
+  if (bitpos < 0 || bitpos + width > GET_MODE_BITSIZE (GET_MODE (op)))
+    return false;
+
+  return true;
+}
+
+/* Print the text for PRINT_OPERAND punctation character CH to FILE.
+   The punctuation characters are:
+
+   '.'	Print the name of the register with a hard-wired zero (zero or $r0).
+   '$'	Print the name of the stack pointer register (sp or $r3).
+
+   See also loongarch_init_print_operand_punct.  */
+
+static void
+loongarch_print_operand_punctuation (FILE *file, int ch)
+{
+  switch (ch)
+    {
+    case '.':
+      fputs (reg_names[GP_REG_FIRST + 0], file);
+      break;
+
+    case '$':
+      fputs (reg_names[STACK_POINTER_REGNUM], file);
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+/* Initialize loongarch_print_operand_punct.  */
+
+static void
+loongarch_init_print_operand_punct (void)
+{
+  const char *p;
+
+  for (p = ".$"; *p; p++)
+    loongarch_print_operand_punct[(unsigned char) *p] = true;
+}
+
+/* PRINT_OPERAND prefix LETTER refers to the integer branch instruction
+   associated with condition CODE.  Print the condition part of the
+   opcode to FILE.  */
+
+static void
+loongarch_print_int_branch_condition (FILE *file, enum rtx_code code,
+				      int letter)
+{
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case GT:
+    case GE:
+    case LT:
+    case LE:
+    case GTU:
+    case GEU:
+    case LTU:
+    case LEU:
+      /* Conveniently, the LoongArch names for these conditions are the same
+	 as their RTL equivalents.  */
+      fputs (GET_RTX_NAME (code), file);
+      break;
+
+    default:
+      output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
+      break;
+    }
+}
+
+/* Likewise floating-point branches.  */
+
+static void
+loongarch_print_float_branch_condition (FILE *file, enum rtx_code code,
+					int letter)
+{
+  switch (code)
+    {
+    case EQ:
+      fputs ("ceqz", file);
+      break;
+
+    case NE:
+      fputs ("cnez", file);
+      break;
+
+    default:
+      output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
+      break;
+    }
+}
+
+/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+
+static bool
+loongarch_print_operand_punct_valid_p (unsigned char code)
+{
+  return loongarch_print_operand_punct[code];
+}
+
+/* Return true if a FENCE should be emitted to before a memory access to
+   implement the release portion of memory model MODEL.  */
+
+static bool
+loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
+{
+  switch (model)
+    {
+      case MEMMODEL_ACQ_REL:
+      case MEMMODEL_SEQ_CST:
+      case MEMMODEL_SYNC_SEQ_CST:
+      case MEMMODEL_RELEASE:
+      case MEMMODEL_SYNC_RELEASE:
+      case MEMMODEL_ACQUIRE:
+      case MEMMODEL_CONSUME:
+      case MEMMODEL_SYNC_ACQUIRE:
+	return true;
+
+      case MEMMODEL_RELAXED:
+	return false;
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
+/* Return true if a FENCE should be emitted to before a memory access to
+   implement the release portion of memory model MODEL.  */
+
+static bool
+loongarch_memmodel_needs_release_fence (enum memmodel model)
+{
+  switch (model)
+    {
+    case MEMMODEL_ACQ_REL:
+    case MEMMODEL_SEQ_CST:
+    case MEMMODEL_SYNC_SEQ_CST:
+    case MEMMODEL_RELEASE:
+    case MEMMODEL_SYNC_RELEASE:
+      return true;
+
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_CONSUME:
+    case MEMMODEL_SYNC_ACQUIRE:
+    case MEMMODEL_RELAXED:
+      return false;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_PRINT_OPERAND.  The LoongArch-specific operand codes are:
+
+   'X'	Print CONST_INT OP in hexadecimal format.
+   'x'	Print the low 16 bits of CONST_INT OP in hexadecimal format.
+   'd'	Print CONST_INT OP in decimal.
+   'm'	Print one less than CONST_INT OP in decimal.
+   'y'	Print exact log2 of CONST_INT OP in decimal.
+   'C'	Print the integer branch condition for comparison OP.
+   'N'	Print the inverse of the integer branch condition for comparison OP.
+   'F'	Print the FPU branch condition for comparison OP.
+   'W'	Print the inverse of the FPU branch condition for comparison OP.
+   'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
+   't'	Like 'T', but with the EQ/NE cases reversed
+   'Y'	Print loongarch_fp_conditions[INTVAL (OP)]
+   'Z'	Print OP and a comma for 8CC, otherwise print nothing.
+   'z'	Print $0 if OP is zero, otherwise print OP normally.
+   'b'	Print the address of a memory operand, without offset.
+   'V'	Print exact log2 of CONST_INT OP element 0 of a replicated
+	  CONST_VECTOR in decimal.
+   'A'	Print a _DB suffix if the memory model requires a release.
+   'G'	Print a DBAR insn if the memory model requires a release.
+   'i'	Print i if the operand is not a register.  */
+
+static void
+loongarch_print_operand (FILE *file, rtx op, int letter)
+{
+  enum rtx_code code;
+
+  if (loongarch_print_operand_punct_valid_p (letter))
+    {
+      loongarch_print_operand_punctuation (file, letter);
+      return;
+    }
+
+  gcc_assert (op);
+  code = GET_CODE (op);
+
+  switch (letter)
+    {
+    case 'X':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'x':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'd':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'm':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'y':
+      if (CONST_INT_P (op))
+	{
+	  int val = exact_log2 (INTVAL (op));
+	  if (val != -1)
+	    fprintf (file, "%d", val);
+	  else
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	}
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'V':
+      if (GET_CODE (op) == CONST_VECTOR)
+	{
+	  machine_mode mode = GET_MODE_INNER (GET_MODE (op));
+	  unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+	  int vlog2 = exact_log2 (val & GET_MODE_MASK (mode));
+	  if (vlog2 != -1)
+	    fprintf (file, "%d", vlog2);
+	  else
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	}
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'C':
+      loongarch_print_int_branch_condition (file, code, letter);
+      break;
+
+    case 'N':
+      loongarch_print_int_branch_condition (file, reverse_condition (code),
+					    letter);
+      break;
+
+    case 'F':
+      loongarch_print_float_branch_condition (file, code, letter);
+      break;
+
+    case 'W':
+      loongarch_print_float_branch_condition (file, reverse_condition (code),
+					      letter);
+      break;
+
+    case 'T':
+    case 't':
+      {
+	int truth = (code == NE) == (letter == 'T');
+	fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
+      }
+      break;
+
+    case 'Y':
+      if (code == CONST_INT
+	  && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions))
+	fputs (loongarch_fp_conditions[UINTVAL (op)], file);
+      else
+	output_operand_lossage ("'%%%c' is not a valid operand prefix",
+				letter);
+      break;
+
+    case 'Z':
+      loongarch_print_operand (file, op, 0);
+      fputc (',', file);
+      break;
+
+    case 'A':
+      if (loongarch_memmodel_needs_rel_acq_fence ((enum memmodel) INTVAL (op)))
+	fputs ("_db", file);
+      break;
+
+    case 'G':
+      if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
+	fputs ("dbar\t0", file);
+      break;
+
+    case 'i':
+      if (code != REG)
+	fputs ("i", file);
+      break;
+
+    default:
+      switch (code)
+	{
+	case REG:
+	  {
+	    unsigned int regno = REGNO (op);
+	    if (letter && letter != 'z')
+	      output_operand_lossage ("invalid use of '%%%c'", letter);
+	    fprintf (file, "%s", reg_names[regno]);
+	  }
+	  break;
+
+	case MEM:
+	  if (letter == 'D')
+	    output_address (GET_MODE (op),
+			    plus_constant (Pmode, XEXP (op, 0), 4));
+	  else if (letter == 'b')
+	    {
+	      gcc_assert (REG_P (XEXP (op, 0)));
+	      loongarch_print_operand (file, XEXP (op, 0), 0);
+	    }
+	  else if (letter && letter != 'z')
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	  else
+	    output_address (GET_MODE (op), XEXP (op, 0));
+	  break;
+
+	default:
+	  if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
+	    fputs (reg_names[GP_REG_FIRST], file);
+	  else if (letter && letter != 'z')
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	  else
+	    output_addr_const (file, loongarch_strip_unspec_address (op));
+	  break;
+	}
+    }
+}
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+
+static void
+loongarch_print_operand_address (FILE *file, machine_mode /* mode  */, rtx x)
+{
+  struct loongarch_address_info addr;
+
+  if (loongarch_classify_address (&addr, x, word_mode, true))
+    switch (addr.type)
+      {
+      case ADDRESS_REG:
+	fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
+	loongarch_print_operand (file, addr.offset, 0);
+	return;
+
+      case ADDRESS_REG_REG:
+	fprintf (file, "%s,%s", reg_names[REGNO (addr.reg)],
+				reg_names[REGNO (addr.offset)]);
+	return;
+
+      case ADDRESS_CONST_INT:
+	fprintf (file, "%s,", reg_names[GP_REG_FIRST]);
+	output_addr_const (file, x);
+	return;
+
+      case ADDRESS_SYMBOLIC:
+	output_addr_const (file, loongarch_strip_unspec_address (x));
+	return;
+      }
+  if (GET_CODE (x) == CONST_INT)
+    output_addr_const (file, x);
+  else
+    gcc_unreachable ();
+}
+
+/* Implement TARGET_ASM_SELECT_RTX_SECTION.  */
+
+static section *
+loongarch_select_rtx_section (machine_mode mode, rtx x,
+			      unsigned HOST_WIDE_INT align)
+{
+  /* ??? Consider using mergeable small data sections.  */
+  if (loongarch_rtx_constant_in_small_data_p (mode))
+    return get_named_section (NULL, ".sdata", 0);
+
+  return default_elf_select_rtx_section (mode, x, align);
+}
+
+/* Implement TARGET_ASM_FUNCTION_RODATA_SECTION.
+
+   The complication here is that, with the combination
+   !TARGET_ABSOLUTE_ABICALLS , jump tables will use
+   absolute addresses, and should therefore not be included in the
+   read-only part of a DSO.  Handle such cases by selecting a normal
+   data section instead of a read-only one.  The logic apes that in
+   default_function_rodata_section.  */
+
+static section *
+loongarch_function_rodata_section (tree decl, bool)
+{
+  return default_function_rodata_section (decl, false);
+}
+
+/* Implement TARGET_IN_SMALL_DATA_P.  */
+
+static bool
+loongarch_in_small_data_p (const_tree decl)
+{
+  int size;
+
+  if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
+    {
+      const char *name;
+
+      /* Reject anything that isn't in a known small-data section.  */
+      name = DECL_SECTION_NAME (decl);
+      if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
+	return false;
+
+      /* If a symbol is defined externally, the assembler will use the
+	 usual -G rules when deciding how to implement macros.  */
+      if (!DECL_EXTERNAL (decl))
+	return true;
+    }
+
+  /* We have traditionally not treated zero-sized objects as small data,
+     so this is now effectively part of the ABI.  */
+  size = int_size_in_bytes (TREE_TYPE (decl));
+  return size > 0 && size <= g_switch_value;
+}
+
+/* Implement TARGET_USE_ANCHORS_FOR_SYMBOL_P.  We don't want to use
+   anchors for small data: the GP register acts as an anchor in that
+   case.  We also don't want to use them for PC-relative accesses,
+   where the PC acts as an anchor.  */
+
+static bool
+loongarch_use_anchors_for_symbol_p (const_rtx symbol)
+{
+  return default_use_anchors_for_symbol_p (symbol);
+}
+
+/* The LoongArch debug format wants all automatic variables and arguments
+   to be in terms of the virtual frame pointer (stack pointer before
+   any adjustment in the function), while the LoongArch linker wants
+   the frame pointer to be the stack pointer after the initial
+   adjustment.  So, we do the adjustment here.  The arg pointer (which
+   is eliminated) points to the virtual frame pointer, while the frame
+   pointer (which may be eliminated) points to the stack pointer after
+   the initial adjustments.  */
+
+HOST_WIDE_INT
+loongarch_debugger_offset (rtx addr, HOST_WIDE_INT offset)
+{
+  rtx offset2 = const0_rtx;
+  rtx reg = eliminate_constant_term (addr, &offset2);
+
+  if (offset == 0)
+    offset = INTVAL (offset2);
+
+  if (reg == stack_pointer_rtx
+      || reg == frame_pointer_rtx
+      || reg == hard_frame_pointer_rtx)
+    {
+      offset -= cfun->machine->frame.total_size;
+      if (reg == hard_frame_pointer_rtx)
+	offset += cfun->machine->frame.hard_frame_pointer_offset;
+    }
+
+  return offset;
+}
+
+/* Implement ASM_OUTPUT_EXTERNAL.  */
+
+void
+loongarch_output_external (FILE *file, tree decl, const char *name)
+{
+  default_elf_asm_output_external (file, decl, name);
+
+  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+     set in order to avoid putting out names that are never really
+     used.  */
+  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+    {
+      if (loongarch_in_small_data_p (decl))
+	{
+	  /* When using assembler macros, emit .extern directives for
+	     all small-data externs so that the assembler knows how
+	     big they are.
+
+	     In most cases it would be safe (though pointless) to emit
+	     .externs for other symbols too.  One exception is when an
+	     object is within the -G limit but declared by the user to
+	     be in a section other than .sbss or .sdata.  */
+	  fputs ("\t.extern\t", file);
+	  assemble_name (file, name);
+	  fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC "\n",
+		   int_size_in_bytes (TREE_TYPE (decl)));
+	}
+    }
+}
+
+/* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
+
+static void ATTRIBUTE_UNUSED
+loongarch_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+  switch (size)
+    {
+    case 4:
+      fputs ("\t.dtprelword\t", file);
+      break;
+
+    case 8:
+      fputs ("\t.dtpreldword\t", file);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  output_addr_const (file, x);
+  fputs ("+0x8000", file);
+}
+
+/* Implement TARGET_DWARF_FRAME_REG_MODE.  */
+
+static machine_mode
+loongarch_dwarf_frame_reg_mode (int regno)
+{
+  machine_mode mode = default_dwarf_frame_reg_mode (regno);
+
+  return mode;
+}
+
+/* Implement ASM_OUTPUT_ASCII.  */
+
+void
+loongarch_output_ascii (FILE *stream, const char *string, size_t len)
+{
+  size_t i;
+  int cur_pos;
+
+  cur_pos = 17;
+  fprintf (stream, "\t.ascii\t\"");
+  for (i = 0; i < len; i++)
+    {
+      int c;
+
+      c = (unsigned char) string[i];
+      if (ISPRINT (c))
+	{
+	  if (c == '\\' || c == '\"')
+	    {
+	      putc ('\\', stream);
+	      cur_pos++;
+	    }
+	  putc (c, stream);
+	  cur_pos++;
+	}
+      else
+	{
+	  fprintf (stream, "\\%03o", c);
+	  cur_pos += 4;
+	}
+
+      if (cur_pos > 72 && i + 1 < len)
+	{
+	  cur_pos = 17;
+	  fprintf (stream, "\"\n\t.ascii\t\"");
+	}
+    }
+  fprintf (stream, "\"\n");
+}
+
+/* Emit either a label, .comm, or .lcomm directive.  When using assembler
+   macros, mark the symbol as written so that loongarch_asm_output_external
+   won't emit an .extern for it.  STREAM is the output file, NAME is the
+   name of the symbol, INIT_STRING is the string that should be written
+   before the symbol and FINAL_STRING is the string that should be
+   written after it.  FINAL_STRING is a printf format that consumes the
+   remaining arguments.  */
+
+void
+loongarch_declare_object (FILE *stream, const char *name,
+			  const char *init_string, const char *final_string,
+			  ...)
+{
+  va_list ap;
+
+  fputs (init_string, stream);
+  assemble_name (stream, name);
+  va_start (ap, final_string);
+  vfprintf (stream, final_string, ap);
+  va_end (ap);
+
+  tree name_tree = get_identifier (name);
+  TREE_ASM_WRITTEN (name_tree) = 1;
+}
+
+/* Declare a common object of SIZE bytes using asm directive INIT_STRING.
+   NAME is the name of the object and ALIGN is the required alignment
+   in bytes.  TAKES_ALIGNMENT_P is true if the directive takes a third
+   alignment argument.  */
+
+void
+loongarch_declare_common_object (FILE *stream, const char *name,
+				 const char *init_string,
+				 unsigned HOST_WIDE_INT size,
+				 unsigned int align, bool takes_alignment_p)
+{
+  if (!takes_alignment_p)
+    {
+      size += (align / BITS_PER_UNIT) - 1;
+      size -= size % (align / BITS_PER_UNIT);
+      loongarch_declare_object (stream, name, init_string,
+				"," HOST_WIDE_INT_PRINT_UNSIGNED "\n", size);
+    }
+  else
+    loongarch_declare_object (stream, name, init_string,
+			      "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", size,
+			      align / BITS_PER_UNIT);
+}
+
+/* Implement ASM_OUTPUT_ALIGNED_DECL_COMMON.  This is usually the same as the
+   elfos.h version.  */
+
+void
+loongarch_output_aligned_decl_common (FILE *stream,
+				      tree decl ATTRIBUTE_UNUSED,
+				      const char *name,
+				      unsigned HOST_WIDE_INT size,
+				      unsigned int align)
+{
+  loongarch_declare_common_object (stream, name, "\n\t.comm\t", size, align,
+				   true);
+}
+
+#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
+extern int size_directive_output;
+
+/* Implement ASM_DECLARE_OBJECT_NAME.  This is like most of the standard ELF
+   definitions except that it uses loongarch_declare_object to emit the label.
+*/
+
+void
+loongarch_declare_object_name (FILE *stream, const char *name,
+			       tree decl ATTRIBUTE_UNUSED)
+{
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+#ifdef USE_GNU_UNIQUE_OBJECT
+  /* As in elfos.h.  */
+  if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (decl)
+      && (!DECL_ARTIFICIAL (decl) || !TREE_READONLY (decl)))
+    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "gnu_unique_object");
+  else
+#endif
+    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
+#endif
+
+  size_directive_output = 0;
+  if (!flag_inhibit_size_directive && DECL_SIZE (decl))
+    {
+      HOST_WIDE_INT size;
+
+      size_directive_output = 1;
+      size = int_size_in_bytes (TREE_TYPE (decl));
+      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+    }
+
+  loongarch_declare_object (stream, name, "", ":\n");
+}
+
+/* Implement ASM_FINISH_DECLARE_OBJECT.  This is generic ELF stuff.  */
+
+void
+loongarch_finish_declare_object (FILE *stream, tree decl, int top_level,
+				 int at_end)
+{
+  const char *name;
+
+  name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+  if (!flag_inhibit_size_directive
+      && DECL_SIZE (decl) != 0
+      && !at_end
+      && top_level
+      && DECL_INITIAL (decl) == error_mark_node
+      && !size_directive_output)
+    {
+      HOST_WIDE_INT size;
+
+      size_directive_output = 1;
+      size = int_size_in_bytes (TREE_TYPE (decl));
+      ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+    }
+}
+#endif
+
+/* Mark text contents as code or data, mainly for the purpose of correct
+   disassembly.  Emit a local symbol and set its type appropriately for
+   that purpose.  */
+
+void
+loongarch_set_text_contents_type (FILE *file ATTRIBUTE_UNUSED,
+				  const char *prefix ATTRIBUTE_UNUSED,
+				  unsigned long num ATTRIBUTE_UNUSED,
+				  bool function_p ATTRIBUTE_UNUSED)
+{
+#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
+  char buf[(sizeof (num) * 10) / 4 + 2];
+  const char *fnname;
+  char *sname;
+  rtx symbol;
+
+  sprintf (buf, "%lu", num);
+  symbol = XEXP (DECL_RTL (current_function_decl), 0);
+  fnname = targetm.strip_name_encoding (XSTR (symbol, 0));
+  sname = ACONCAT ((prefix, fnname, "_", buf, NULL));
+
+  ASM_OUTPUT_TYPE_DIRECTIVE (file, sname, function_p ? "function" : "object");
+  assemble_name (file, sname);
+  fputs (":\n", file);
+#endif
+}
+
+/* Implement TARGET_ASM_FILE_START.  */
+
+static void
+loongarch_file_start (void)
+{
+  default_file_start ();
+}
+
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+static bool
+loongarch_frame_pointer_required (void)
+{
+  /* If the function contains dynamic stack allocations, we need to
+     use the frame pointer to access the static parts of the frame.  */
+  if (cfun->calls_alloca)
+    return true;
+
+  return false;
+}
+
+/* Implement TARGET_CAN_ELIMINATE.  Make sure that we're not trying
+   to eliminate to the wrong hard frame pointer.  */
+
+static bool
+loongarch_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
+}
+
+/* Implement RETURN_ADDR_RTX.  We do not support moving back to a
+   previous frame.  */
+
+rtx
+loongarch_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
+}
+
+/* Emit code to change the current function's return address to
+   ADDRESS.  SCRATCH is available as a scratch register, if needed.
+   ADDRESS and SCRATCH are both word-mode GPRs.  */
+
+void
+loongarch_set_return_address (rtx address, rtx scratch)
+{
+  rtx slot_address;
+
+  gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
+
+  if (frame_pointer_needed)
+    slot_address = loongarch_add_offset (scratch, hard_frame_pointer_rtx,
+					 -UNITS_PER_WORD);
+  else
+    slot_address = loongarch_add_offset (scratch, stack_pointer_rtx,
+					 cfun->machine->frame.gp_sp_offset);
+
+  loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address),
+		       address);
+}
+
+/* Implement ASM_DECLARE_FUNCTION_NAME.  */
+
+void
+loongarch_declare_function_name (FILE *stream ATTRIBUTE_UNUSED,
+				 const char *name,
+				 tree fndecl ATTRIBUTE_UNUSED)
+{
+  loongarch_start_function_definition (name);
+}
+
+/* Return true if register REGNO can store a value of mode MODE.
+   The result of this function is cached in loongarch_hard_regno_mode_ok.  */
+
+static bool
+loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
+{
+  unsigned int size;
+  enum mode_class mclass;
+
+  if (mode == FCCmode)
+    return FCC_REG_P (regno);
+
+  size = GET_MODE_SIZE (mode);
+  mclass = GET_MODE_CLASS (mode);
+
+  if (GP_REG_P (regno))
+    return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
+
+  if (FP_REG_P (regno)
+      && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0
+	  || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG)))
+    {
+      if (mclass == MODE_FLOAT
+	  || mclass == MODE_COMPLEX_FLOAT
+	  || mclass == MODE_VECTOR_FLOAT)
+	return size <= UNITS_PER_FPVALUE;
+
+      /* Allow integer modes that fit into a single register.  We need
+	 to put integers into FPRs when using instructions like CVT
+	 and TRUNC.  There's no point allowing sizes smaller than a word,
+	 because the FPU has no appropriate load/store instructions.  */
+      if (mclass == MODE_INT)
+	return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG;
+    }
+
+  return false;
+}
+
+/* Implement TARGET_HARD_REGNO_MODE_OK.  */
+
+static bool
+loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
+{
+  return loongarch_hard_regno_mode_ok_p[mode][regno];
+}
+
+/* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
+
+bool
+loongarch_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+				unsigned int new_reg ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
+/* Implement TARGET_HARD_REGNO_NREGS.  */
+
+static unsigned int
+loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode)
+{
+  if (FCC_REG_P (regno))
+    /* The size of FP status registers is always 4, because they only hold
+       FCCmode values, and FCCmode is always considered to be 4 bytes wide.  */
+    return (GET_MODE_SIZE (mode) + 3) / 4;
+
+  if (FP_REG_P (regno))
+    return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
+
+  /* All other registers are word-sized.  */
+  return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+}
+
+/* Implement CLASS_MAX_NREGS, taking the maximum of the cases
+   in loongarch_hard_regno_nregs.  */
+
+int
+loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
+{
+  int size;
+  HARD_REG_SET left;
+
+  size = 0x8000;
+  left = reg_class_contents[rclass];
+  if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FCC_REGS]))
+    {
+      if (loongarch_hard_regno_mode_ok (FCC_REG_FIRST, mode))
+	size = MIN (size, 4);
+
+      left &= ~reg_class_contents[FCC_REGS];
+    }
+  if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS]))
+    {
+      if (loongarch_hard_regno_mode_ok (FP_REG_FIRST, mode))
+	size = MIN (size, UNITS_PER_FPREG);
+
+      left &= ~reg_class_contents[FP_REGS];
+    }
+  if (!hard_reg_set_empty_p (left))
+    size = MIN (size, UNITS_PER_WORD);
+  return (GET_MODE_SIZE (mode) + size - 1) / size;
+}
+
+/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
+
+static bool
+loongarch_can_change_mode_class (machine_mode from, machine_mode to,
+				 reg_class_t rclass)
+{
+  /* Allow conversions between different Loongson integer vectors,
+     and between those vectors and DImode.  */
+  if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8
+      && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
+    return true;
+
+  return !reg_classes_intersect_p (FP_REGS, rclass);
+}
+
+/* Return true if moves in mode MODE can use the FPU's fmov.fmt instruction,
+*/
+
+static bool
+loongarch_mode_ok_for_mov_fmt_p (machine_mode mode)
+{
+  switch (mode)
+    {
+    case E_FCCmode:
+    case E_SFmode:
+      return TARGET_HARD_FLOAT;
+
+    case E_DFmode:
+      return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT;
+
+    case E_V2SFmode:
+      return 0;
+
+    default:
+      return 0;
+    }
+}
+
+/* Implement TARGET_MODES_TIEABLE_P.  */
+
+static bool
+loongarch_modes_tieable_p (machine_mode mode1, machine_mode mode2)
+{
+  /* FPRs allow no mode punning, so it's not worth tying modes if we'd
+     prefer to put one of them in FPRs.  */
+  return (mode1 == mode2
+	  || (!loongarch_mode_ok_for_mov_fmt_p (mode1)
+	      && !loongarch_mode_ok_for_mov_fmt_p (mode2)));
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+
+static reg_class_t
+loongarch_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (reg_class_subset_p (FP_REGS, rclass)
+      && loongarch_mode_ok_for_mov_fmt_p (GET_MODE (x)))
+    return FP_REGS;
+
+  if (reg_class_subset_p (GR_REGS, rclass))
+    rclass = GR_REGS;
+
+  return rclass;
+}
+
+/* RCLASS is a class involved in a REGISTER_MOVE_COST calculation.
+   Return a "canonical" class to represent it in later calculations.  */
+
+static reg_class_t
+loongarch_canonicalize_move_class (reg_class_t rclass)
+{
+  if (reg_class_subset_p (rclass, GENERAL_REGS))
+    rclass = GENERAL_REGS;
+
+  return rclass;
+}
+
+/* Return the cost of moving a value from a register of class FROM to a GPR.
+   Return 0 for classes that are unions of other classes handled by this
+   function.  */
+
+static int
+loongarch_move_to_gpr_cost (reg_class_t from)
+{
+  switch (from)
+    {
+    case GENERAL_REGS:
+      /* MOVE macro.  */
+      return 2;
+
+    case FP_REGS:
+      /* MOVFR2GR, etc.  */
+      return 4;
+
+    default:
+      return 0;
+    }
+}
+
+/* Return the cost of moving a value from a GPR to a register of class TO.
+   Return 0 for classes that are unions of other classes handled by this
+   function.  */
+
+static int
+loongarch_move_from_gpr_cost (reg_class_t to)
+{
+  switch (to)
+    {
+    case GENERAL_REGS:
+      /*MOVE macro.  */
+      return 2;
+
+    case FP_REGS:
+      /* MOVGR2FR, etc.  */
+      return 4;
+
+    default:
+      return 0;
+    }
+}
+
+/* Implement TARGET_REGISTER_MOVE_COST.  Return 0 for classes that are the
+   maximum of the move costs for subclasses; regclass will work out
+   the maximum for us.  */
+
+static int
+loongarch_register_move_cost (machine_mode mode, reg_class_t from,
+			      reg_class_t to)
+{
+  reg_class_t dregs;
+  int cost1, cost2;
+
+  from = loongarch_canonicalize_move_class (from);
+  to = loongarch_canonicalize_move_class (to);
+
+  /* Handle moves that can be done without using general-purpose registers.  */
+  if (from == FP_REGS)
+    {
+      if (to == FP_REGS && loongarch_mode_ok_for_mov_fmt_p (mode))
+	/* FMOV.FMT.  */
+	return 4;
+    }
+
+  /* Handle cases in which only one class deviates from the ideal.  */
+  dregs = GENERAL_REGS;
+  if (from == dregs)
+    return loongarch_move_from_gpr_cost (to);
+  if (to == dregs)
+    return loongarch_move_to_gpr_cost (from);
+
+  /* Handles cases that require a GPR temporary.  */
+  cost1 = loongarch_move_to_gpr_cost (from);
+  if (cost1 != 0)
+    {
+      cost2 = loongarch_move_from_gpr_cost (to);
+      if (cost2 != 0)
+	return cost1 + cost2;
+    }
+
+  return 0;
+}
+
+/* Implement TARGET_MEMORY_MOVE_COST.  */
+
+static int
+loongarch_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
+{
+  return (loongarch_cost->memory_latency
+	  + memory_move_secondary_cost (mode, rclass, in));
+}
+
+/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
+
+   This can be achieved using MOVFRH2GR.S/MOVGR2FRH.W when these instructions
+   are available but otherwise moves must go via memory.  Using
+   MOVGR2FR/MOVFR2GR to access the lower-half of these registers would require
+   a forbidden single-precision access.  We require all double-word moves to
+   use memory because adding even and odd floating-point registers classes
+   would have a significant impact on the backend.  */
+
+static bool
+loongarch_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
+				   reg_class_t class1, reg_class_t class2)
+{
+  /* Ignore spilled pseudos.  */
+  if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
+    return false;
+
+  return false;
+}
+
+/* Return the register class required for a secondary register when
+   copying between one of the registers in RCLASS and value X, which
+   has mode MODE.  X is the source of the move if IN_P, otherwise it
+   is the destination.  Return NO_REGS if no secondary register is
+   needed.  */
+
+static reg_class_t
+loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
+			    reg_class_t rclass, machine_mode mode,
+			    secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  int regno;
+
+  regno = true_regnum (x);
+
+  if (reg_class_subset_p (rclass, FP_REGS))
+    {
+      if (regno < 0
+	  || (MEM_P (x)
+	      && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)))
+	/* In this case we can use fld.s, fst.s, fld.d or fst.d.  */
+	return NO_REGS;
+
+      if (GP_REG_P (regno) || x == CONST0_RTX (mode))
+	/* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or
+	 * movfr2gr.d.  */
+	return NO_REGS;
+
+      if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x))
+	/* We can force the constant to memory and use fld.s
+	   and fld.d.  As above, we will use pairs of lwc1s if
+	   ldc1 is not supported.  */
+	return NO_REGS;
+
+      if (FP_REG_P (regno) && loongarch_mode_ok_for_mov_fmt_p (mode))
+	/* In this case we can use fmov.{s/d}.  */
+	return NO_REGS;
+
+      /* Otherwise, we need to reload through an integer register.  */
+      return GR_REGS;
+    }
+  if (FP_REG_P (regno))
+    return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_VALID_POINTER_MODE.  */
+
+static bool
+loongarch_valid_pointer_mode (scalar_int_mode mode)
+{
+  return mode == SImode || (TARGET_64BIT && mode == DImode);
+}
+
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
+
+static bool
+loongarch_scalar_mode_supported_p (scalar_mode mode)
+{
+  if (ALL_FIXED_POINT_MODE_P (mode)
+      && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
+    return true;
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static machine_mode
+loongarch_preferred_simd_mode (scalar_mode mode ATTRIBUTE_UNUSED)
+{
+  return word_mode;
+}
+
+/* Return the assembly code for INSN, which has the operands given by
+   OPERANDS, and which branches to OPERANDS[0] if some condition is true.
+   BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0]
+   is in range of a direct branch.  BRANCH_IF_FALSE is an inverted
+   version of BRANCH_IF_TRUE.  */
+
+const char *
+loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands,
+				     const char *branch_if_true,
+				     const char *branch_if_false)
+{
+  unsigned int length;
+  rtx taken;
+
+  gcc_assert (LABEL_P (operands[0]));
+
+  length = get_attr_length (insn);
+  if (length <= 4)
+    {
+      return branch_if_true;
+    }
+
+  /* Generate a reversed branch around a direct jump.  */
+  rtx_code_label *not_taken = gen_label_rtx ();
+  taken = operands[0];
+
+  /* Generate the reversed branch to NOT_TAKEN.  */
+  operands[0] = not_taken;
+  output_asm_insn (branch_if_false, operands);
+
+  output_asm_insn ("b\t%0", &taken);
+
+  /* Output NOT_TAKEN.  */
+  targetm.asm_out.internal_label (asm_out_file, "L",
+				  CODE_LABEL_NUMBER (not_taken));
+  return "";
+}
+
+/* Return the assembly code for INSN, which branches to OPERANDS[0]
+   if some equality condition is true.  The condition is given by
+   OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
+   OPERANDS[1].  OPERANDS[2] is the comparison's first operand;
+   OPERANDS[3] is the second operand and may be zero or a register.  */
+
+const char *
+loongarch_output_equal_conditional_branch (rtx_insn *insn, rtx *operands,
+					   bool inverted_p)
+{
+  const char *branch[2];
+  if (operands[3] == const0_rtx)
+    {
+      branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0");
+      branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0");
+    }
+  else
+    {
+      branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%z3,%0");
+      branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%z3,%0");
+    }
+
+  return loongarch_output_conditional_branch (insn, operands, branch[1],
+					      branch[0]);
+}
+
+/* Return the assembly code for INSN, which branches to OPERANDS[0]
+   if some ordering condition is true.  The condition is given by
+   OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
+   OPERANDS[1].  OPERANDS[2] is the comparison's first operand;
+   OPERANDS[3] is the second operand and may be zero or a register.  */
+
+const char *
+loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
+					   bool inverted_p)
+{
+  const char *branch[2];
+
+  /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true.
+     Make BRANCH[0] branch on the inverse condition.  */
+  if (operands[3] != const0_rtx)
+    {
+      /* Handle degenerate cases that should not, but do, occur.  */
+      if (REGNO (operands[2]) == REGNO (operands[3]))
+	{
+	  switch (GET_CODE (operands[1]))
+	    {
+	    case LT:
+	    case LTU:
+	    case GT:
+	    case GTU:
+	      inverted_p = !inverted_p;
+	      /* Fall through.  */
+	    case LE:
+	    case LEU:
+	    case GE:
+	    case GEU:
+	      branch[!inverted_p] = LARCH_BRANCH ("b", "%0");
+	      branch[inverted_p] = "\t# branch never";
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  switch (GET_CODE (operands[1]))
+	    {
+	    case LE:
+	    case LEU:
+	    case GT:
+	    case GTU:
+	    case LT:
+	    case LTU:
+	    case GE:
+	    case GEU:
+	      branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0");
+	      branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0");
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+  else
+    {
+      switch (GET_CODE (operands[1]))
+	{
+	  /* These cases are equivalent to comparisons against zero.  */
+	case LEU:
+	case GTU:
+	case LTU:
+	case GEU:
+	case LE:
+	case GT:
+	case LT:
+	case GE:
+	  branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,$r0,%0");
+	  branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,$r0,%0");
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  return loongarch_output_conditional_branch (insn, operands, branch[1],
+					      branch[0]);
+}
+
+/* Return the assembly code for DIV.{W/D} instruction DIVISION, which has
+   the operands given by OPERANDS.  Add in a divide-by-zero check if needed.
+   */
+
+const char *
+loongarch_output_division (const char *division, rtx *operands)
+{
+  const char *s;
+
+  s = division;
+  if (TARGET_CHECK_ZERO_DIV)
+    {
+      output_asm_insn (s, operands);
+      s = "bne\t%2,%.,1f\n\tbreak\t7\n1:";
+    }
+  return s;
+}
+
+/* Implement TARGET_SCHED_ADJUST_COST.  We assume that anti and output
+   dependencies have no cost.  */
+
+static int
+loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost,
+		       unsigned int)
+{
+  if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT))
+    return 0;
+  return cost;
+}
+
+/* Return the number of instructions that can be issued per cycle.  */
+
+static int
+loongarch_issue_rate (void)
+{
+  if ((unsigned long) __ACTUAL_TUNE < N_TUNE_TYPES)
+    return loongarch_cpu_issue_rate[__ACTUAL_TUNE];
+  else
+    return 1;
+}
+
+/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD.  This should
+   be as wide as the scheduling freedom in the DFA.  */
+
+static int
+loongarch_multipass_dfa_lookahead (void)
+{
+  if ((unsigned long) __ACTUAL_TUNE < N_ARCH_TYPES)
+    return loongarch_cpu_multipass_dfa_lookahead[__ACTUAL_TUNE];
+  else
+    return 0;
+}
+
+/* Implement TARGET_SCHED_REORDER.  */
+
+static int
+loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED,
+			 int verbose ATTRIBUTE_UNUSED,
+			 rtx_insn **ready ATTRIBUTE_UNUSED,
+			 int *nreadyp ATTRIBUTE_UNUSED,
+			 int cycle ATTRIBUTE_UNUSED)
+{
+  return loongarch_issue_rate ();
+}
+
+/* Implement TARGET_SCHED_REORDER2.  */
+
+static int
+loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
+			  int verbose ATTRIBUTE_UNUSED,
+			  rtx_insn **ready ATTRIBUTE_UNUSED,
+			  int *nreadyp ATTRIBUTE_UNUSED,
+			  int cycle ATTRIBUTE_UNUSED)
+{
+  return cached_can_issue_more;
+}
+
+/* Implement TARGET_SCHED_INIT.  */
+
+static void
+loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED,
+		      int verbose ATTRIBUTE_UNUSED,
+		      int max_ready ATTRIBUTE_UNUSED)
+{}
+
+/* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
+
+static int
+loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED,
+			  int verbose ATTRIBUTE_UNUSED, rtx_insn *insn,
+			  int more)
+{
+  /* Ignore USEs and CLOBBERs; don't count them against the issue rate.  */
+  if (USEFUL_INSN_P (insn))
+    {
+      if (get_attr_type (insn) != TYPE_GHOST)
+	more--;
+    }
+
+  /* Instructions of type 'multi' should all be split before
+     the second scheduling pass.  */
+  gcc_assert (!reload_completed
+	      || recog_memoized (insn) < 0
+	      || get_attr_type (insn) != TYPE_MULTI);
+
+  cached_can_issue_more = more;
+  return more;
+}
+
+/* A structure representing the state of the processor pipeline.
+   Used by the loongarch_sim_* family of functions.  */
+
+struct loongarch_sim
+{
+  /* The maximum number of instructions that can be issued in a cycle.
+     (Caches loongarch_issue_rate.)  */
+  unsigned int issue_rate;
+
+  /* The current simulation time.  */
+  unsigned int time;
+
+  /* How many more instructions can be issued in the current cycle.  */
+  unsigned int insns_left;
+
+  /* LAST_SET[X].INSN is the last instruction to set register X.
+     LAST_SET[X].TIME is the time at which that instruction was issued.
+     INSN is null if no instruction has yet set register X.  */
+  struct
+  {
+    rtx_insn *insn;
+    unsigned int time;
+  } last_set[FIRST_PSEUDO_REGISTER];
+
+  /* The pipeline's current DFA state.  */
+  state_t dfa_state;
+};
+
+/* Reset STATE to the initial simulation state.  */
+
+static void
+loongarch_sim_reset (struct loongarch_sim *state)
+{
+  curr_state = state->dfa_state;
+
+  state->time = 0;
+  state->insns_left = state->issue_rate;
+  memset (&state->last_set, 0, sizeof (state->last_set));
+  state_reset (curr_state);
+
+  targetm.sched.init (0, false, 0);
+  advance_state (curr_state);
+}
+
+/* Initialize STATE before its first use.  DFA_STATE points to an
+   allocated but uninitialized DFA state.  */
+
+static void
+loongarch_sim_init (struct loongarch_sim *state, state_t dfa_state)
+{
+  if (targetm.sched.init_dfa_pre_cycle_insn)
+    targetm.sched.init_dfa_pre_cycle_insn ();
+
+  if (targetm.sched.init_dfa_post_cycle_insn)
+    targetm.sched.init_dfa_post_cycle_insn ();
+
+  state->issue_rate = loongarch_issue_rate ();
+  state->dfa_state = dfa_state;
+  loongarch_sim_reset (state);
+}
+
+/* Set up costs based on the current architecture and tuning settings.  */
+
+static void
+loongarch_set_tuning_info (void)
+{
+  dfa_start ();
+
+  struct loongarch_sim state;
+  loongarch_sim_init (&state, alloca (state_size ()));
+
+  dfa_finish ();
+}
+
+/* Implement TARGET_EXPAND_TO_RTL_HOOK.  */
+
+static void
+loongarch_expand_to_rtl_hook (void)
+{
+  /* We need to call this at a point where we can safely create sequences
+     of instructions, so TARGET_OVERRIDE_OPTIONS is too early.  We also
+     need to call it at a point where the DFA infrastructure is not
+     already in use, so we can't just call it lazily on demand.
+
+     At present, loongarch_tuning_info is only needed during post-expand
+     RTL passes such as split_insns, so this hook should be early enough.
+     We may need to move the call elsewhere if loongarch_tuning_info starts
+     to be used for other things (such as rtx_costs, or expanders that
+     could be called during gimple optimization).  */
+
+  loongarch_set_tuning_info ();
+}
+
+
+/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY),
+   return the first operand of the associated PREF or PREFX insn.  */
+
+rtx
+loongarch_prefetch_cookie (rtx write, rtx locality)
+{
+  /* store_streamed / load_streamed.  */
+  if (INTVAL (locality) <= 0)
+    return GEN_INT (INTVAL (write) + 4);
+
+  /* store / load.  */
+  if (INTVAL (locality) <= 2)
+    return write;
+
+  /* store_retained / load_retained.  */
+  return GEN_INT (INTVAL (write) + 6);
+}
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
+   in order to avoid duplicating too much logic from elsewhere.  */
+
+static void
+loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+			   tree function)
+{
+  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
+  rtx this_rtx, temp1, temp2, fnaddr;
+  rtx_insn *insn;
+  bool use_sibcall_p;
+
+  /* Pretend to be a post-reload pass while generating rtl.  */
+  reload_completed = 1;
+
+  /* Mark the end of the (empty) prologue.  */
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Determine if we can use a sibcall to call FUNCTION directly.  */
+  fnaddr = XEXP (DECL_RTL (function), 0);
+  use_sibcall_p = const_call_insn_operand (fnaddr, Pmode);
+
+  /* We need two temporary registers in some cases.  */
+  temp1 = gen_rtx_REG (Pmode, 12);
+  temp2 = gen_rtx_REG (Pmode, 13);
+
+  /* Find out which register contains the "this" pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
+  else
+    this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
+
+  /* Add DELTA to THIS_RTX.  */
+  if (delta != 0)
+    {
+      rtx offset = GEN_INT (delta);
+      if (!IMM12_OPERAND (delta))
+	{
+	  loongarch_emit_move (temp1, offset);
+	  offset = temp1;
+	}
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
+    }
+
+  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
+  if (vcall_offset != 0)
+    {
+      rtx addr;
+
+      /* Set TEMP1 to *THIS_RTX.  */
+      loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
+
+      /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
+      addr = loongarch_add_offset (temp2, temp1, vcall_offset);
+
+      /* Load the offset and add it to THIS_RTX.  */
+      loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
+      emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
+    }
+
+  /* Jump to the target function.  Use a sibcall if direct jumps are
+     allowed, otherwise load the address into a register first.  */
+  if (use_sibcall_p)
+    {
+      insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
+      SIBLING_CALL_P (insn) = 1;
+    }
+  else
+    {
+      loongarch_emit_move (temp1, fnaddr);
+      emit_jump_insn (gen_indirect_jump (temp1));
+    }
+
+  /* Run just enough of rest_of_compilation.  This sequence was
+     "borrowed" from alpha.c.  */
+  insn = get_insns ();
+  split_all_insns_noflow ();
+  shorten_branches (insn);
+  assemble_start_function (thunk_fndecl, fnname);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+  assemble_end_function (thunk_fndecl, fnname);
+
+  /* Stop pretending to be a post-reload pass.  */
+  reload_completed = 0;
+}
+
+/* Allocate a chunk of memory for per-function machine-dependent data.  */
+
+static struct machine_function *
+loongarch_init_machine_status (void)
+{
+  return ggc_cleared_alloc<machine_function> ();
+}
+
+static void
+loongarch_option_override_internal (struct gcc_options *opts,
+				    struct gcc_options *opts_set);
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+loongarch_option_override (void)
+{
+  loongarch_option_override_internal (&global_options, &global_options_set);
+}
+
+static void
+loongarch_option_override_internal (struct gcc_options *opts,
+				    struct gcc_options *opts_set)
+{
+  int i, regno, mode;
+
+  (void) opts_set;
+
+  if (flag_pic)
+    g_switch_value = 0;
+
+  /* Handle target-specific options: compute defaults/conflicts etc.  */
+  loongarch_config_target (&la_target, la_opt_switches,
+			   la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu,
+			   la_opt_abi_base, la_opt_abi_ext, la_opt_cmodel, 0);
+
+  /* End of code shared with GAS.  */
+  if (TARGET_ABI_LP64)
+    flag_pcc_struct_return = 0;
+
+  /* Decide which rtx_costs structure to use.  */
+  if (optimize_size)
+    loongarch_cost = &loongarch_rtx_cost_optimize_size;
+  else
+    loongarch_cost = &loongarch_cpu_rtx_cost_data[__ACTUAL_TUNE];
+
+  /* If the user hasn't specified a branch cost, use the processor's
+     default.  */
+  if (loongarch_branch_cost == 0)
+    loongarch_branch_cost = loongarch_cost->branch_cost;
+
+
+  switch (la_target.cmodel)
+    {
+      case CMODEL_TINY_STATIC:
+      case CMODEL_EXTREME:
+	if (opts->x_flag_plt)
+	  error ("code model %qs and %qs not support %s mode",
+		 "tiny-static", "extreme", "plt");
+	break;
+
+      case CMODEL_NORMAL:
+      case CMODEL_TINY:
+      case CMODEL_LARGE:
+	break;
+
+      default:
+	gcc_unreachable ();
+    }
+
+  loongarch_init_print_operand_punct ();
+
+  /* Set up array to map GCC register number to debug register number.
+     Ignore the special purpose register numbers.  */
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (GP_REG_P (i) || FP_REG_P (i))
+	loongarch_dwarf_regno[i] = i;
+      else
+	loongarch_dwarf_regno[i] = INVALID_REGNUM;
+    }
+
+  /* Set up loongarch_hard_regno_mode_ok.  */
+  for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
+    for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+      loongarch_hard_regno_mode_ok_p[mode][regno]
+	= loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
+
+  /* Function to allocate machine-dependent function status.  */
+  init_machine_status = &loongarch_init_machine_status;
+}
+
+/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
+
+static void
+loongarch_conditional_register_usage (void)
+{
+  if (!TARGET_HARD_FLOAT)
+    accessible_reg_set &= ~(reg_class_contents[FP_REGS]
+			    | reg_class_contents[FCC_REGS]);
+}
+
+/* Implement EH_USES.  */
+
+bool
+loongarch_eh_uses (unsigned int regno ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+/* Implement EPILOGUE_USES.  */
+
+bool
+loongarch_epilogue_uses (unsigned int regno)
+{
+  /* Say that the epilogue uses the return address register.  Note that
+     in the case of sibcalls, the values "used by the epilogue" are
+     considered live at the start of the called function.  */
+  if (regno == RETURN_ADDR_REGNUM)
+    return true;
+
+  return false;
+}
+
+bool
+loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
+{
+  rtx reg1, reg2, mem1, mem2, base1, base2;
+  enum reg_class rc1, rc2;
+  HOST_WIDE_INT offset1, offset2;
+
+  if (load_p)
+    {
+      reg1 = operands[0];
+      reg2 = operands[2];
+      mem1 = operands[1];
+      mem2 = operands[3];
+    }
+  else
+    {
+      reg1 = operands[1];
+      reg2 = operands[3];
+      mem1 = operands[0];
+      mem2 = operands[2];
+    }
+
+  if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0
+      || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0)
+    return false;
+
+  loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1);
+  loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2);
+
+  /* Base regs do not match.  */
+  if (!REG_P (base1) || !rtx_equal_p (base1, base2))
+    return false;
+
+  /* Either of the loads is clobbering base register.  It is legitimate to bond
+     loads if second load clobbers base register.  However, hardware does not
+     support such bonding.  */
+  if (load_p
+      && (REGNO (reg1) == REGNO (base1) || (REGNO (reg2) == REGNO (base1))))
+    return false;
+
+  /* Loading in same registers.  */
+  if (load_p && REGNO (reg1) == REGNO (reg2))
+    return false;
+
+  /* The loads/stores are not of same type.  */
+  rc1 = REGNO_REG_CLASS (REGNO (reg1));
+  rc2 = REGNO_REG_CLASS (REGNO (reg2));
+  if (rc1 != rc2 && !reg_class_subset_p (rc1, rc2)
+      && !reg_class_subset_p (rc2, rc1))
+    return false;
+
+  if (abs (offset1 - offset2) != GET_MODE_SIZE (mode))
+    return false;
+
+  return true;
+}
+
+/* Implement TARGET_TRAMPOLINE_INIT.  */
+
+static void
+loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx addr, end_addr, mem;
+  rtx trampoline[8];
+  unsigned int i, j;
+  HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
+
+  /* Work out the offsets of the pointers from the start of the
+     trampoline code.  */
+  end_addr_offset = TRAMPOLINE_CODE_SIZE;
+  static_chain_offset = end_addr_offset;
+  target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
+
+  /* Get pointers to the beginning and end of the code block.  */
+  addr = force_reg (Pmode, XEXP (m_tramp, 0));
+  end_addr
+    = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
+
+#define OP(X) gen_int_mode (X, SImode)
+
+  /* Build up the code in TRAMPOLINE.  */
+  i = 0;
+  /*pcaddi $static_chain,0
+    ld.[dw] $tmp,$static_chain,target_function_offset
+    ld.[dw] $static_chain,$static_chain,static_chain_offset
+    jirl $r0,$tmp,0  */
+  trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST));
+  trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
+			| 19 /* $t7  */
+			| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
+			| ((target_function_offset & 0xfff) << 10));
+  trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
+			| (STATIC_CHAIN_REGNUM - GP_REG_FIRST)
+			| ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
+			| ((static_chain_offset & 0xfff) << 10));
+  trampoline[i++] = OP (0x4c000000 | (19 << 5));
+#undef OP
+
+  for (j = 0; j < i; j++)
+   {
+     mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
+     loongarch_emit_move (mem, trampoline[j]);
+   }
+
+  /* Set up the static chain pointer field.  */
+  mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
+  loongarch_emit_move (mem, chain_value);
+
+  /* Set up the target function field.  */
+  mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
+  loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
+
+  /* Flush the code part of the trampoline.  */
+  emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
+  emit_insn (gen_clear_cache (addr, end_addr));
+}
+
+/* Implement TARGET_SHIFT_TRUNCATION_MASK.  We want to keep the default
+   behavior of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes.  */
+
+static unsigned HOST_WIDE_INT
+loongarch_shift_truncation_mask (machine_mode mode)
+{
+  return GET_MODE_BITSIZE (mode) - 1;
+}
+
+/* Implement TARGET_SCHED_REASSOCIATION_WIDTH.  */
+
+static int
+loongarch_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
+				     machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return 1;
+}
+
+/* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
+
+machine_mode
+loongarch_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
+				       machine_mode mode)
+{
+  /* For performance, avoid saving/restoring upper parts of a register
+     by returning MODE as save mode when the mode is known.  */
+  if (mode == VOIDmode)
+    return choose_hard_reg_mode (regno, nregs, NULL);
+  else
+    return mode;
+}
+
+/* Implement TARGET_CASE_VALUES_THRESHOLD.  */
+
+unsigned int
+loongarch_case_values_threshold (void)
+{
+  return default_case_values_threshold ();
+}
+
+/* Implement TARGET_SPILL_CLASS.  */
+
+static reg_class_t
+loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED,
+		       machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return NO_REGS;
+}
+
+/* Implement TARGET_LRA_P.  */
+
+static bool
+loongarch_lra_p (void)
+{
+  return loongarch_lra_flag;
+}
+
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.  */
+
+static reg_class_t
+loongarch_ira_change_pseudo_allocno_class (int regno,
+					   reg_class_t allocno_class,
+					   reg_class_t best_class     \
+					   ATTRIBUTE_UNUSED)
+{
+  /* LRA will allocate an FPR for an integer mode pseudo instead of spilling
+     to memory if an FPR is present in the allocno class.  It is rare that
+     we actually need to place an integer mode value in an FPR so where
+     possible limit the allocation to GR_REGS.  This will slightly pessimize
+     code that involves integer to/from float conversions as these will have
+     to reload into FPRs in LRA.  Such reloads are sometimes eliminated and
+     sometimes only partially eliminated.  We choose to take this penalty
+     in order to eliminate usage of FPRs in code that does not use floating
+     point data.
+
+     This change has a similar effect to increasing the cost of FPR->GPR
+     register moves for integer modes so that they are higher than the cost
+     of memory but changing the allocno class is more reliable.
+
+     This is also similar to forbidding integer mode values in FPRs entirely
+     but this would lead to an inconsistency in the integer to/from float
+     instructions that say integer mode values must be placed in FPRs.  */
+  if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS)
+    return GR_REGS;
+  return allocno_class;
+}
+
+/* Implement TARGET_PROMOTE_FUNCTION_MODE.  */
+
+/* This function is equivalent to default_promote_function_mode_always_promote
+   except that it returns a promoted mode even if type is NULL_TREE.  This is
+   needed by libcalls which have no type (only a mode) such as fixed conversion
+   routines that take a signed or unsigned char/short argument and convert it
+   to a fixed type.  */
+
+static machine_mode
+loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+				 machine_mode mode,
+				 int *punsignedp ATTRIBUTE_UNUSED,
+				 const_tree fntype ATTRIBUTE_UNUSED,
+				 int for_return ATTRIBUTE_UNUSED)
+{
+  int unsignedp;
+
+  if (type != NULL_TREE)
+    return promote_mode (type, mode, punsignedp);
+
+  unsignedp = *punsignedp;
+  PROMOTE_MODE (mode, unsignedp, type);
+  *punsignedp = unsignedp;
+  return mode;
+}
+
+/* Implement TARGET_TRULY_NOOP_TRUNCATION.  */
+
+static bool
+loongarch_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
+{
+  return !TARGET_64BIT || inprec <= 32 || outprec > 32;
+}
+
+/* Implement TARGET_CONSTANT_ALIGNMENT.  */
+
+static HOST_WIDE_INT
+loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align)
+{
+  if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
+    return MAX (align, BITS_PER_WORD);
+  return align;
+}
+
+/* Implement TARGET_STARTING_FRAME_OFFSET.  See loongarch_compute_frame_info
+   for details about the frame layout.  */
+
+static HOST_WIDE_INT
+loongarch_starting_frame_offset (void)
+{
+  if (FRAME_GROWS_DOWNWARD)
+    return 0;
+  return crtl->outgoing_args_size;
+}
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE loongarch_option_override
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address
+
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION loongarch_select_rtx_section
+#undef TARGET_ASM_FUNCTION_RODATA_SECTION
+#define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT loongarch_sched_init
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER loongarch_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 loongarch_sched_reorder2
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE loongarch_variable_issue
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST loongarch_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE loongarch_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+  loongarch_multipass_dfa_lookahead
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL loongarch_function_ok_for_sibcall
+
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE loongarch_valid_pointer_mode
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST loongarch_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST loongarch_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS loongarch_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST loongarch_address_cost
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P loongarch_in_small_data_p
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS loongarch_preferred_reload_class
+
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK loongarch_expand_to_rtl_hook
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START loongarch_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START loongarch_va_start
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE loongarch_promote_function_mode
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY loongarch_return_in_memory
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE loongarch_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE loongarch_libcall_value
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK loongarch_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND loongarch_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS loongarch_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P \
+  loongarch_print_operand_punct_valid_p
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS loongarch_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE loongarch_pass_by_reference
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES loongarch_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG loongarch_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE loongarch_function_arg_advance
+#undef TARGET_FUNCTION_ARG_BOUNDARY
+#define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P loongarch_scalar_mode_supported_p
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE loongarch_preferred_simd_mode
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS loongarch_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL loongarch_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN loongarch_expand_builtin
+
+/* The generic ELF target does not always have TLS support.  */
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+#endif
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM loongarch_cannot_force_const_mem
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P loongarch_legitimate_constant_p
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE loongarch_attribute_table
+/* All our function attributes are related to how out-of-line copies should
+   be compiled or called.  They don't in themselves prevent inlining.  */
+#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
+
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
+#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
+#define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p
+
+#ifdef HAVE_AS_DTPRELWORD
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL loongarch_output_dwarf_dtprel
+#endif
+#undef TARGET_DWARF_FRAME_REG_MODE
+#define TARGET_DWARF_FRAME_REG_MODE loongarch_dwarf_frame_reg_mode
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED loongarch_frame_pointer_required
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE loongarch_can_eliminate
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE loongarch_conditional_register_usage
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT loongarch_trampoline_init
+
+#undef TARGET_SHIFT_TRUNCATION_MASK
+#define TARGET_SHIFT_TRUNCATION_MASK loongarch_shift_truncation_mask
+
+#undef TARGET_SCHED_REASSOCIATION_WIDTH
+#define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width
+
+#undef TARGET_CASE_VALUES_THRESHOLD
+#define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold
+
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv
+
+#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
+#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
+
+#undef TARGET_SPILL_CLASS
+#define TARGET_SPILL_CLASS loongarch_spill_class
+#undef TARGET_LRA_P
+#define TARGET_LRA_P loongarch_lra_p
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
+  loongarch_ira_change_pseudo_allocno_class
+
+#undef TARGET_HARD_REGNO_NREGS
+#define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs
+#undef TARGET_HARD_REGNO_MODE_OK
+#define TARGET_HARD_REGNO_MODE_OK loongarch_hard_regno_mode_ok
+
+#undef TARGET_MODES_TIEABLE_P
+#define TARGET_MODES_TIEABLE_P loongarch_modes_tieable_p
+
+#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
+#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
+
+#undef TARGET_SECONDARY_MEMORY_NEEDED
+#define TARGET_SECONDARY_MEMORY_NEEDED loongarch_secondary_memory_needed
+
+#undef TARGET_CAN_CHANGE_MODE_CLASS
+#define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class
+
+#undef TARGET_TRULY_NOOP_TRUNCATION
+#define TARGET_TRULY_NOOP_TRUNCATION loongarch_truly_noop_truncation
+
+#undef TARGET_CONSTANT_ALIGNMENT
+#define TARGET_CONSTANT_ALIGNMENT loongarch_constant_alignment
+
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET loongarch_starting_frame_offset
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD loongarch_secondary_reload
+
+#undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
+#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
new file mode 100644
index 00000000000..791f9d064c5
--- /dev/null
+++ b/gcc/config/loongarch/loongarch.h
@@ -0,0 +1,1273 @@ 
+/* Definitions of target machine for GNU compiler.  LoongArch version.
+   Copyright (C) 2021-2022 Free Software Foundation, Inc.
+   Contributed by Loongson Ltd.
+   Based on MIPS and RISC-V target for GNU compiler.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* LoongArch external variables defined in loongarch.c.  */
+
+#include "config/loongarch/loongarch-opts.h"
+
+/* Macros to silence warnings about numbers being signed in traditional
+   C and unsigned in ISO C when compiled on 32-bit hosts.  */
+
+#define BITMASK_HIGH (((unsigned long) 1) << 31) /* 0x80000000  */
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS() loongarch_cpu_cpp_builtins (pfile)
+
+/* Default target_flags if no switches are specified.  */
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+/* Make this compile time constant for libgcc2.  */
+#ifdef __loongarch64
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif /* IN_LIBGCC2  */
+
+#define TARGET_LIBGCC_SDATA_SECTION ".sdata"
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-divide is ignored if -mdivide-traps or -mdivide-breaks are
+     specified.  */
+#define OPTION_DEFAULT_SPECS \
+  {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}"},
+
+/* Driver native functions for SPEC processing in the GCC driver.  */
+#include "loongarch-driver.h"
+
+/* This definition replaces the formerly used 'm' constraint with a
+   different constraint letter in order to avoid changing semantics of
+   the 'm' constraint when accepting new address formats in
+   TARGET_LEGITIMATE_ADDRESS_P.  The constraint letter defined here
+   must not be used in insn definitions or inline assemblies.  */
+#define TARGET_MEM_CONSTRAINT 'w'
+
+/* Tell collect what flags to pass to nm.  */
+#ifndef NM_FLAGS
+#define NM_FLAGS "-Bn"
+#endif
+
+/* SUBTARGET_ASM_DEBUGGING_SPEC handles passing debugging options to
+   the assembler.  It may be overridden by subtargets.  */
+
+#ifndef SUBTARGET_ASM_DEBUGGING_SPEC
+#define SUBTARGET_ASM_DEBUGGING_SPEC "\
+%{g} %{g0} %{g1} %{g2} %{g3} \
+%{ggdb:-g} %{ggdb0:-g0} %{ggdb1:-g1} %{ggdb2:-g2} %{ggdb3:-g3} \
+%{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \
+%{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3}"
+#endif
+
+/* SUBTARGET_ASM_SPEC is always passed to the assembler.  It may be
+   overridden by subtargets.  */
+
+#ifndef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC ""
+#endif
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{mabi=*} %{subtarget_asm_spec}"
+
+/* Extra switches sometimes passed to the linker.  */
+
+#ifndef LINK_SPEC
+#define LINK_SPEC ""
+#endif /* LINK_SPEC defined  */
+
+/* Specs for the compiler proper.  */
+
+/* CC1_SPEC is the set of arguments to pass to the compiler proper.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC "\
+%{G*} \
+%(subtarget_cc1_spec)"
+
+/* Preprocessor specs.  */
+
+/* SUBTARGET_CPP_SPEC is passed to the preprocessor.  It may be
+   overridden by subtargets.  */
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#define CPP_SPEC "%(subtarget_cpp_spec)"
+
+/* This macro defines names of additional specifications to put in the specs
+   that can be used in various specifications like CC1_SPEC.  Its definition
+   is an initializer with a subgrouping for each command option.
+
+   Each subgrouping contains a string constant, that defines the
+   specification name, and a string constant that used by the GCC driver
+   program.
+
+   Do not define this macro if it does not need to do anything.  */
+
+#define EXTRA_SPECS \
+  {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \
+  {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \
+  {"subtarget_asm_debugging_spec", SUBTARGET_ASM_DEBUGGING_SPEC}, \
+  {"subtarget_asm_spec", SUBTARGET_ASM_SPEC},
+
+/* Registers may have a prefix which can be ignored when matching
+   user asm and register definitions.  */
+#ifndef REGISTER_PREFIX
+#define REGISTER_PREFIX "$"
+#endif
+
+/* Local compiler-generated symbols must have a prefix that the assembler
+   understands.  */
+
+#define LOCAL_LABEL_PREFIX "."
+
+/* By default on the loongarch, external symbols do not have an underscore
+   prepended.  */
+
+#define USER_LABEL_PREFIX ""
+
+#ifndef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#endif
+
+/* The size of DWARF addresses should be the same as the size of symbols
+   in the target file format.  */
+#define DWARF2_ADDR_SIZE (TARGET_64BIT ? 8 : 4)
+
+/* By default, turn on GDB extensions.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* By default, produce dwarf version 2 format debugging output in response
+   to the ‘-g’ option.  */
+#define DWARF2_DEBUGGING_INFO 1
+
+/* The mapping from gcc register number to DWARF 2 CFA column number.  */
+#define DWARF_FRAME_REGNUM(REGNO) loongarch_dwarf_regno[REGNO]
+
+/* The DWARF 2 CFA column which tracks the return address.  */
+#define DWARF_FRAME_RETURN_COLUMN RETURN_ADDR_REGNUM
+
+/* Before the prologue, RA lives in r1.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) \
+  ((N) < (4) ? (N) + GP_ARG_FIRST : INVALID_REGNUM)
+
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, GP_ARG_FIRST + 4)
+
+#define EH_USES(N) loongarch_eh_uses (N)
+
+/* Offsets recorded in opcodes are a multiple of this alignment factor.
+   The default for this in 64-bit mode is 8, which causes problems with
+   SFmode register saves.  */
+#define DWARF_CIE_DATA_ALIGNMENT -4
+
+/* Target machine storage layout.  */
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#ifndef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD 4
+#endif
+
+/* For LARCH, width of a floating point register.  */
+#define UNITS_PER_FPREG (TARGET_DOUBLE_FLOAT ? 8 : 4)
+
+/* The number of consecutive floating-point registers needed to store the
+   largest format supported by the FPU.  */
+#define MAX_FPRS_PER_FMT 1
+
+/* The number of consecutive floating-point registers needed to store the
+   smallest format supported by the FPU.  */
+#define MIN_FPRS_PER_FMT 1
+
+/* The largest size of value that can be held in floating-point
+   registers and moved with a single instruction.  */
+#define UNITS_PER_HWFPVALUE \
+  (TARGET_SOFT_FLOAT ? 0 : MAX_FPRS_PER_FMT * UNITS_PER_FPREG)
+
+/* The largest size of value that can be held in floating-point
+   registers.  */
+#define UNITS_PER_FPVALUE \
+  (TARGET_SOFT_FLOAT ? 0 \
+   : TARGET_SINGLE_FLOAT ? UNITS_PER_FPREG \
+			 : LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
+
+/* The number of bytes in a double.  */
+#define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
+
+/* Set the sizes of the core types.  */
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define LONG_TYPE_SIZE (TARGET_64BIT ? 64 : 32)
+#define LONG_LONG_TYPE_SIZE 64
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+
+/* Define the sizes of fixed-point types.  */
+#define SHORT_FRACT_TYPE_SIZE 8
+#define FRACT_TYPE_SIZE 16
+#define LONG_FRACT_TYPE_SIZE 32
+#define LONG_LONG_FRACT_TYPE_SIZE 64
+
+#define SHORT_ACCUM_TYPE_SIZE 16
+#define ACCUM_TYPE_SIZE 32
+#define LONG_ACCUM_TYPE_SIZE 64
+#define LONG_LONG_ACCUM_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+
+/* long double is not a fixed mode, but the idea is that, if we
+   support long double, we also want a 128-bit integer type.  */
+#define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE
+
+/* Width in bits of a pointer.  */
+#ifndef POINTER_SIZE
+#define POINTER_SIZE (TARGET_64BIT ? 64 : 32)
+#endif
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 32
+
+/* Number of bits which any structure or union's size must be a multiple of.
+   Each structure or union's size is rounded up to a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* There is no point aligning anything to a rounder boundary than
+   LONG_DOUBLE_TYPE_SIZE.  */
+#define BIGGEST_ALIGNMENT (LONG_DOUBLE_TYPE_SIZE)
+
+/* All accesses must be aligned.  */
+#define STRICT_ALIGNMENT (TARGET_STRICT_ALIGN)
+
+/* Define this if you wish to imitate the way many other C compilers
+   handle alignment of bitfields and the structures that contain
+   them.
+
+   The behavior is that the type written for a bit-field (`int',
+   `short', or other integer type) imposes an alignment for the
+   entire structure, as if the structure really did contain an
+   ordinary field of that type.  In addition, the bit-field is placed
+   within the structure so that it would fit within such a field,
+   not crossing a boundary for it.
+
+   Thus, on most machines, a bit-field whose type is written as `int'
+   would not cross a four-byte boundary, and would force four-byte
+   alignment for the whole structure.  (The alignment used may not
+   be four bytes; it is controlled by the other alignment
+   parameters.)
+
+   If the macro is defined, its definition should be a C expression;
+   a nonzero value for the expression enables this behavior.  */
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* If defined, a C expression to compute the alignment for a static
+   variable.  TYPE is the data type, and ALIGN is the alignment that
+   the object would ordinarily have.  The value of this macro is used
+   instead of that alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.
+
+   One use of this macro is to increase alignment of medium-size
+   data to make it all fit in fewer cache lines.  Another is to
+   cause character arrays to be word-aligned so that `strcpy' calls
+   that copy constants to character arrays can be done inline.  */
+
+#undef DATA_ALIGNMENT
+#define DATA_ALIGNMENT(TYPE, ALIGN)					\
+  ((((ALIGN) < BITS_PER_WORD)						\
+    && (TREE_CODE (TYPE) == ARRAY_TYPE					\
+	|| TREE_CODE (TYPE) == UNION_TYPE				\
+	|| TREE_CODE (TYPE) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
+
+/* We need this for the same reason as DATA_ALIGNMENT, namely to cause
+   character arrays to be word-aligned so that `strcpy' calls that copy
+   constants to character arrays can be done inline, and 'strcmp' can be
+   optimised to use word loads.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) DATA_ALIGNMENT (TYPE, ALIGN)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS 1
+
+/* When in 64-bit mode, move insns will sign extend SImode and FCCmode
+   moves.  All other references are zero extended.  */
+#define LOAD_EXTEND_OP(MODE) \
+  (TARGET_64BIT && ((MODE) == SImode || (MODE) == FCCmode) ? SIGN_EXTEND \
+							   : ZERO_EXTEND)
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+  if (GET_MODE_CLASS (MODE) == MODE_INT \
+      && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
+    { \
+      if ((MODE) == SImode) \
+	(UNSIGNEDP) = 0; \
+      (MODE) = Pmode; \
+    }
+
+/* Pmode is always the same as ptr_mode, but not always the same as word_mode.
+   Extensions of pointers to word_mode must be signed.  */
+#define POINTERS_EXTEND_UNSIGNED false
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND 1
+
+/* The clz.{w/d} instructions have the natural values at 0.  */
+
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
+
+/* Standard register usage.  */
+
+/* Number of hardware registers.  We have:
+
+   - 32 integer registers
+   - 32 floating point registers
+   - 8 condition code registers
+   - 2 fake registers:
+	- ARG_POINTER_REGNUM
+	- FRAME_POINTER_REGNUM
+*/
+
+#define FIRST_PSEUDO_REGISTER 74
+
+/* zero, tp, sp and x are fixed.  */
+#define FIXED_REGISTERS							\
+{ /* General registers.  */						\
+  1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* Floating-point registers.  */					\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* Others.  */							\
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1}
+
+/* The call RTLs themselves clobber ra.  */
+#define CALL_USED_REGISTERS						\
+{ /* General registers.  */						\
+  1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* Floating-point registers.  */					\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
+  1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,			\
+  /* Others.  */							\
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
+
+/* Internal macros to classify a register number as to whether it's a
+   general purpose register, a floating point register, or a status
+   register.  */
+
+#define GP_REG_FIRST 0
+#define GP_REG_LAST 31
+#define GP_REG_NUM (GP_REG_LAST - GP_REG_FIRST + 1)
+
+#define FP_REG_FIRST 32
+#define FP_REG_LAST 63
+#define FP_REG_NUM (FP_REG_LAST - FP_REG_FIRST + 1)
+
+/* The DWARF 2 CFA column which tracks the return address from a
+   signal handler context.  This means that to maintain backwards
+   compatibility, no hard register can be assigned this column if it
+   would need to be handled by the DWARF unwinder.  */
+#define DWARF_ALT_FRAME_RETURN_COLUMN 72
+
+#define FCC_REG_FIRST 64
+#define FCC_REG_LAST 71
+#define FCC_REG_NUM (FCC_REG_LAST - FCC_REG_FIRST + 1)
+
+#define GP_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM)
+#define FP_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM)
+#define FCC_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - FCC_REG_FIRST) < FCC_REG_NUM)
+
+#define FP_REG_RTX_P(X) (REG_P (X) && FP_REG_P (REGNO (X)))
+
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+  loongarch_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* Select a register mode required for caller save of hard regno REGNO.  */
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+  loongarch_hard_regno_caller_save_mode (REGNO, NREGS, MODE)
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM (GP_REG_FIRST + 3)
+
+/* These two registers don't really exist: they get eliminated to either
+   the stack or hard frame pointer.  */
+#define ARG_POINTER_REGNUM 72
+#define FRAME_POINTER_REGNUM 73
+
+#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 22)
+
+#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
+#define HARD_FRAME_POINTER_IS_ARG_POINTER 0
+
+/* Register in which static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM (GP_REG_FIRST + 20) /* $t8  */
+
+#define GP_TEMP_FIRST (GP_REG_FIRST + 12)
+#define LARCH_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST + 1)
+#define LARCH_PROLOGUE_TEMP2_REGNUM (GP_TEMP_FIRST)
+#define LARCH_PROLOGUE_TEMP3_REGNUM (GP_TEMP_FIRST + 2)
+#define LARCH_EPILOGUE_TEMP_REGNUM (GP_TEMP_FIRST)
+
+#define CALLEE_SAVED_REG_NUMBER(REGNO) \
+  ((REGNO) >= 22 && (REGNO) <= 31 ? (REGNO) -22 : -1)
+
+#define LARCH_PROLOGUE_TEMP(MODE) \
+  gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP_REGNUM)
+#define LARCH_PROLOGUE_TEMP2(MODE) \
+  gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP2_REGNUM)
+#define LARCH_PROLOGUE_TEMP3(MODE) \
+  gen_rtx_REG (MODE, LARCH_PROLOGUE_TEMP3_REGNUM)
+#define LARCH_EPILOGUE_TEMP(MODE) \
+  gen_rtx_REG (MODE, LARCH_EPILOGUE_TEMP_REGNUM)
+
+/* Define this macro if it is as good or better to call a constant
+   function address than to call an address kept in a register.  */
+#define NO_FUNCTION_CSE 1
+
+#define THREAD_POINTER_REGNUM (GP_REG_FIRST + 2)
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class
+{
+  NO_REGS,	  /* no registers in set  */
+  SIBCALL_REGS,	  /* SIBCALL_REGS  */
+  JIRL_REGS,	  /* JIRL_REGS  */
+  GR_REGS,	  /* integer registers  */
+  CSR_REGS,	  /* integer registers except for $r0 and $r1 for lcsr.  */
+  FP_REGS,	  /* floating point registers  */
+  FCC_REGS,	  /* status registers (fp status)  */
+  FRAME_REGS,	  /* arg pointer and frame pointer  */
+  ALL_REGS,	  /* all registers  */
+  LIM_REG_CLASSES /* max value + 1  */
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+#define GENERAL_REGS GR_REGS
+
+/* An initializer containing the names of the register classes as C
+   string constants.  These names are used in writing some of the
+   debugging dumps.  */
+
+#define REG_CLASS_NAMES							\
+{									\
+  "NO_REGS",								\
+  "SIBCALL_REGS",							\
+  "JIRL_REGS",								\
+  "GR_REGS",								\
+  "CSR_REGS",								\
+  "FP_REGS",								\
+  "FCC_REGS",								\
+  "FRAME_REGS",								\
+  "ALL_REGS"								\
+}
+
+/* An initializer containing the contents of the register classes,
+   as integers which are bit masks.  The Nth integer specifies the
+   contents of class N.  The way the integer MASK is interpreted is
+   that register R is in the class if `MASK & (1 << R)' is 1.
+
+   When the machine has more than 32 registers, an integer does not
+   suffice.  Then the integers are replaced by sub-initializers,
+   braced groupings containing several integers.  Each
+   sub-initializer must be suitable as an initializer for the type
+   `HARD_REG_SET' which is defined in `hard-reg-set.h'.  */
+
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS  */		\
+  { 0x001ff000, 0x00000000, 0x00000000 },	/* SIBCALL_REGS  */	\
+  { 0xff9ffff0, 0x00000000, 0x00000000 },	/* JIRL_REGS  */	\
+  { 0xffffffff, 0x00000000, 0x00000000 },	/* GR_REGS  */		\
+  { 0xfffffffc, 0x00000000, 0x00000000 },	/* CSR_REGS  */		\
+  { 0x00000000, 0xffffffff, 0x00000000 },	/* FP_REGS  */		\
+  { 0x00000000, 0x00000000, 0x000000ff },	/* FCC_REGS  */		\
+  { 0x00000000, 0x00000000, 0x00000300 },	/* FRAME_REGS  */	\
+  { 0xffffffff, 0xffffffff, 0x000003ff }	/* ALL_REGS  */		\
+}
+
+/* A C expression whose value is a register class containing hard
+   register REGNO.  In general there is more that one such class;
+   choose a class which is "minimal", meaning that no smaller class
+   also contains the register.  */
+
+#define REGNO_REG_CLASS(REGNO) loongarch_regno_to_class[(REGNO)]
+
+/* A macro whose definition is the name of the class to which a
+   valid base register must belong.  A base register is one used in
+   an address which is the register value plus a displacement.  */
+
+#define BASE_REG_CLASS (GR_REGS)
+
+/* A macro whose definition is the name of the class to which a
+   valid index register must belong.  An index register is one used
+   in an address where its value is either multiplied by a scale
+   factor or added to another register (as well as added to a
+   displacement).  */
+
+#define INDEX_REG_CLASS GR_REGS
+
+/* We generally want to put call-clobbered registers ahead of
+   call-saved ones.  (IRA expects this.)  */
+
+#define REG_ALLOC_ORDER							\
+{ /* Call-clobbered GPRs.  */						\
+  12, 13, 14, 15, 16, 17, 18, 19, 20, 4, 5, 6, 7, 8, 9, 10, 11, 1,	\
+  /* Call-saved GPRs.  */						\
+  23, 24, 25, 26, 27, 28, 29, 30, 31,					\
+  /* GPRs that can never be exposed to the register allocator.  */	\
+  0, 2, 3, 21, 22, 							\
+  /* Call-clobbered FPRs.  */						\
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
+  48, 49, 50, 51,52, 53, 54, 55, 					\
+  56, 57, 58, 59, 60, 61, 62, 63,					\
+  /* None of the remaining classes have defined call-saved		\
+     registers.  */							\
+  64, 65, 66, 67, 68, 69, 70, 71, 72, 73}
+
+#define IMM_BITS 12
+#define IMM_REACH (1LL << IMM_BITS)
+
+/* True if VALUE is an unsigned 6-bit number.  */
+
+#define UIMM6_OPERAND(VALUE) (((VALUE) & ~(unsigned HOST_WIDE_INT) 0x3f) == 0)
+
+/* True if VALUE is a signed 10-bit number.  */
+
+#define IMM10_OPERAND(VALUE) ((unsigned HOST_WIDE_INT) (VALUE) + 0x200 < 0x400)
+
+/* True if VALUE is a signed 12-bit number.  */
+
+#define IMM12_OPERAND(VALUE) \
+  ((unsigned HOST_WIDE_INT) (VALUE) + IMM_REACH / 2 < IMM_REACH)
+
+/* True if VALUE is a signed 16-bit number.  */
+
+#define IMM16_OPERAND(VALUE) \
+  ((unsigned HOST_WIDE_INT) (VALUE) + 0x8000 < 0x10000)
+
+/* True if VALUE is an unsigned 12-bit number.  */
+
+#define IMM12_OPERAND_UNSIGNED(VALUE) \
+  (((VALUE) & ~(unsigned HOST_WIDE_INT) (IMM_REACH - 1)) == 0)
+
+/* True if VALUE can be loaded into a register using LU12I.  */
+
+#define LU12I_OPERAND(VALUE) \
+  (((VALUE) | ((1UL << 31) - IMM_REACH)) == ((1UL << 31) - IMM_REACH) \
+   || ((VALUE) | ((1UL << 31) - IMM_REACH)) + IMM_REACH == 0)
+
+/* True if VALUE can be loaded into a register using LU32I.  */
+
+#define LU32I_OPERAND(VALUE) \
+  (((VALUE) | (((1ULL << 19) - 1) << 32)) == (((1ULL << 19) - 1) << 32) \
+   || ((VALUE) | (((1ULL << 19) - 1) << 32)) + (1ULL << 32) == 0)
+
+/* True if VALUE can be loaded into a register using LU52I.  */
+
+#define LU52I_OPERAND(VALUE) (((VALUE) | (0xfffULL << 52)) == (0xfffULL << 52))
+
+/* Return a value X with the low 12 bits clear, and such that
+   VALUE - X is a signed 12-bit value.  */
+
+#define CONST_HIGH_PART(VALUE) (((VALUE) + (IMM_REACH / 2)) & ~(IMM_REACH - 1))
+
+#define CONST_LOW_PART(VALUE) ((VALUE) -CONST_HIGH_PART (VALUE))
+
+#define IMM12_INT(X) IMM12_OPERAND (INTVAL (X))
+#define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X))
+#define LU12I_INT(X) LU12I_OPERAND (INTVAL (X))
+#define LU32I_INT(X) LU32I_OPERAND (INTVAL (X))
+#define LU52I_INT(X) LU52I_OPERAND (INTVAL (X))
+#define LARCH_U12BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -2048, 2047))
+#define LARCH_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -256, 255))
+#define LARCH_16BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, -32768, 32767))
+#define LARCH_SHIFT_2_OFFSET_P(OFFSET) (((OFFSET) & 0x3) == 0)
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.  */
+
+#define CLASS_MAX_NREGS(CLASS, MODE) loongarch_class_max_nregs (CLASS, MODE)
+
+/* Stack layout; function entry, exit and calling.  */
+
+#define STACK_GROWS_DOWNWARD 1
+
+#define FRAME_GROWS_DOWNWARD 1
+
+#define RETURN_ADDR_RTX loongarch_return_addr
+
+/* Similarly, don't use the least-significant bit to tell pointers to
+   code from vtable index.  */
+
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta
+
+#define ELIMINABLE_REGS \
+  { \
+    {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+      {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
+      {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+      {FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
+  }
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  (OFFSET) = loongarch_initial_elimination_offset ((FROM), (TO))
+
+/* Allocate stack space for arguments at the beginning of each function.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* The argument pointer always points to the first argument.  */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+#define REG_PARM_STACK_SPACE(FNDECL) 0
+
+/* Define this if it is the responsibility of the caller to
+   allocate the area reserved for arguments passed in registers.
+   If `ACCUMULATE_OUTGOING_ARGS' is also defined, the only effect
+   of this macro is to determine whether the space is included in
+   `crtl->outgoing_args_size'.  */
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
+
+#define STACK_BOUNDARY (TARGET_ABI_LP64 ? 128 : 64)
+
+/* Symbolic macros for the registers used to return integer and floating
+   point values.  */
+
+#define GP_RETURN (GP_REG_FIRST + 4)
+#define FP_RETURN ((TARGET_SOFT_FLOAT) ? GP_RETURN : (FP_REG_FIRST + 0))
+
+#define MAX_ARGS_IN_REGISTERS 8
+
+/* Symbolic macros for the first/last argument registers.  */
+
+#define GP_ARG_FIRST (GP_REG_FIRST + 4)
+#define GP_ARG_LAST (GP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
+#define FP_ARG_FIRST (FP_REG_FIRST + 0)
+#define FP_ARG_LAST (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
+
+/* 1 if N is a possible register number for function argument passing.
+   We have no FP argument registers when soft-float.  */
+
+/* Accept arguments in a0-a7, and in fa0-fa7 if permitted by the ABI.  */
+#define FUNCTION_ARG_REGNO_P(N) \
+  (IN_RANGE ((N), GP_ARG_FIRST, GP_ARG_LAST) \
+   || (UNITS_PER_FP_ARG && IN_RANGE ((N), FP_ARG_FIRST, FP_ARG_LAST)))
+
+/* This structure has to cope with two different argument allocation
+   schemes.  Most LoongArch ABIs view the arguments as a structure, of which
+   the first N words go in registers and the rest go on the stack.  If I
+   < N, the Ith word might go in Ith integer argument register or in a
+   floating-point register.  For these ABIs, we only need to remember
+   the offset of the current argument into the structure.
+
+   So for the standard ABIs, the first N words are allocated to integer
+   registers, and loongarch_function_arg decides on an argument-by-argument
+   basis whether that argument should really go in an integer register,
+   or in a floating-point one.  */
+
+typedef struct loongarch_args
+{
+  /* Number of integer registers used so far, up to MAX_ARGS_IN_REGISTERS.  */
+  unsigned int num_gprs;
+
+  /* Number of floating-point registers used so far, likewise.  */
+  unsigned int num_fprs;
+
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+  memset (&(CUM), 0, sizeof (CUM))
+
+#define EPILOGUE_USES(REGNO) loongarch_epilogue_uses (REGNO)
+
+/* Treat LOC as a byte offset from the stack pointer and round it up
+   to the next fully-aligned offset.  */
+#define LARCH_STACK_ALIGN(LOC) \
+  (TARGET_ABI_LP64 ? ROUND_UP ((LOC), 16) : ROUND_UP ((LOC), 8))
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  */
+
+#define MCOUNT_NAME "_mcount"
+
+/* Emit rtl for profiling.  Output assembler code to FILE
+   to call "_mcount" for profiling a function entry.  */
+#define PROFILE_HOOK(LABEL) \
+  { \
+    rtx fun, ra; \
+    ra = get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM); \
+    fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
+    emit_library_call (fun, LCT_NORMAL, VOIDmode, ra, Pmode); \
+  }
+
+/* All the work done in PROFILE_HOOK, but still required.  */
+#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
+
+#define NO_PROFILE_COUNTERS 1
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Trampolines are a block of code followed by two pointers.  */
+
+#define TRAMPOLINE_CODE_SIZE 16
+#define TRAMPOLINE_SIZE \
+  ((Pmode == SImode) ? TRAMPOLINE_CODE_SIZE \
+		     : (TRAMPOLINE_CODE_SIZE + POINTER_SIZE * 2))
+#define TRAMPOLINE_ALIGNMENT POINTER_SIZE
+
+/* loongarch_trampoline_init calls this library function to flush
+   program and data caches.  */
+
+#ifndef CACHE_FLUSH_FUNC
+#define CACHE_FLUSH_FUNC "_flush_cache"
+#endif
+
+/* Addressing modes, and classification of registers for them.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+  loongarch_regno_mode_ok_for_base_p (REGNO, VOIDmode, 1)
+
+#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \
+  loongarch_regno_mode_ok_for_base_p (REGNO, MODE, 1)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Check for constness inline but use loongarch_legitimate_address_p
+   to check whether a constant really is an address.  */
+
+#define CONSTANT_ADDRESS_P(X) (CONSTANT_P (X) && memory_address_p (SImode, X))
+
+/* This handles the magic '..CURRENT_FUNCTION' symbol, which means
+   'the start of the function that this code is output in'.  */
+
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+  do \
+    { \
+      if (strcmp (NAME, "..CURRENT_FUNCTION") == 0) \
+	asm_fprintf ((FILE), "%U%s", \
+		     XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); \
+      else \
+	asm_fprintf ((FILE), "%U%s", (NAME)); \
+    } \
+  while (0)
+
+#define CASE_VECTOR_MODE Pmode
+
+/* Only use short offsets if their range will not overflow.  */
+#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#ifndef DEFAULT_SIGNED_CHAR
+#define DEFAULT_SIGNED_CHAR 1
+#endif
+
+/* The SPARC port says:
+   The maximum number of bytes that a single instruction
+   can move quickly between memory and registers or between
+   two memory locations.  */
+#define MOVE_MAX UNITS_PER_WORD
+#define MAX_MOVE_MAX 8
+
+/* The SPARC port says:
+   Nonzero if access to memory by bytes is slow and undesirable.
+   For RISC chips, it means that access to memory by bytes is no
+   better than access by words when possible, so grab a whole word
+   and maybe make use of that.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Standard LoongArch integer shifts truncate the shift amount to the
+   width of the shifted operand.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+
+#ifndef Pmode
+#define Pmode (TARGET_64BIT ? DImode : SImode)
+#endif
+
+/* Give call MEMs SImode since it is the "most permissive" mode
+   for both 32-bit and 64-bit targets.  */
+
+#define FUNCTION_MODE SImode
+
+/* We allocate $fcc registers by hand and can't cope with moves of
+   CCmode registers to and from pseudos (or memory).  */
+#define AVOID_CCMODE_COPIES
+
+/* A C expression for the cost of a branch instruction.  A value of
+   1 is the default; other values are interpreted relative to that.  */
+
+#define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost
+#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+
+/* Return the asm template for a conditional branch instruction.
+   OPCODE is the opcode's mnemonic and OPERANDS is the asm template for
+   its operands.  */
+#define LARCH_BRANCH(OPCODE, OPERANDS) OPCODE "\t" OPERANDS
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+
+#ifndef ASM_APP_ON
+#define ASM_APP_ON " #APP\n"
+#endif
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+
+#ifndef ASM_APP_OFF
+#define ASM_APP_OFF " #NO_APP\n"
+#endif
+
+#define REGISTER_NAMES							  \
+{ "$r0",   "$r1",   "$r2",   "$r3",   "$r4",   "$r5",   "$r6",   "$r7",   \
+  "$r8",   "$r9",   "$r10",  "$r11",  "$r12",  "$r13",  "$r14",  "$r15",  \
+  "$r16",  "$r17",  "$r18",  "$r19",  "$r20",  "$r21",  "$r22",  "$r23",  \
+  "$r24",  "$r25",  "$r26",  "$r27",  "$r28",  "$r29",  "$r30",  "$r31",  \
+  "$f0",  "$f1",  "$f2",  "$f3",  "$f4",  "$f5",  "$f6",  "$f7",	  \
+  "$f8",  "$f9",  "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",	  \
+  "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",	  \
+  "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",	  \
+  "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7",	  \
+  "$arg", "$frame"}
+
+/* This macro defines additional names for hard registers.  */
+
+#define ADDITIONAL_REGISTER_NAMES					\
+{									\
+  { "zero",	 0 + GP_REG_FIRST },					\
+  { "ra",	 1 + GP_REG_FIRST },					\
+  { "tp",	 2 + GP_REG_FIRST },					\
+  { "sp",	 3 + GP_REG_FIRST },					\
+  { "a0",	 4 + GP_REG_FIRST },					\
+  { "a1",	 5 + GP_REG_FIRST },					\
+  { "a2",	 6 + GP_REG_FIRST },					\
+  { "a3",	 7 + GP_REG_FIRST },					\
+  { "a4",	 8 + GP_REG_FIRST },					\
+  { "a5",	 9 + GP_REG_FIRST },					\
+  { "a6",	10 + GP_REG_FIRST },					\
+  { "a7",	11 + GP_REG_FIRST },					\
+  { "t0",	12 + GP_REG_FIRST },					\
+  { "t1",	13 + GP_REG_FIRST },					\
+  { "t2",	14 + GP_REG_FIRST },					\
+  { "t3",	15 + GP_REG_FIRST },					\
+  { "t4",	16 + GP_REG_FIRST },					\
+  { "t5",	17 + GP_REG_FIRST },					\
+  { "t6",	18 + GP_REG_FIRST },					\
+  { "t7",	19 + GP_REG_FIRST },					\
+  { "t8",	20 + GP_REG_FIRST },					\
+  { "x",	21 + GP_REG_FIRST },					\
+  { "fp",	22 + GP_REG_FIRST },					\
+  { "s0",	23 + GP_REG_FIRST },					\
+  { "s1",	24 + GP_REG_FIRST },					\
+  { "s2",	25 + GP_REG_FIRST },					\
+  { "s3",	26 + GP_REG_FIRST },					\
+  { "s4",	27 + GP_REG_FIRST },					\
+  { "s5",	28 + GP_REG_FIRST },					\
+  { "s6",	29 + GP_REG_FIRST },					\
+  { "s7",	30 + GP_REG_FIRST },					\
+  { "s8",	31 + GP_REG_FIRST },					\
+  { "v0",	 4 + GP_REG_FIRST },					\
+  { "v1",	 5 + GP_REG_FIRST }					\
+}
+
+/* The LoongArch implementation uses some labels for its own purpose.  The
+   following lists what labels are created, and are all formed by the
+   pattern $L[a-z].*.  The machine independent portion of GCC creates
+   labels matching:  $L[A-Z][0-9]+ and $L[0-9]+.
+
+	LM[0-9]+	Silicon Graphics/ECOFF stabs label before each stmt.
+	$Lb[0-9]+	Begin blocks for LoongArch debug support
+	$Lc[0-9]+	Label for use in s<xx> operation.
+	$Le[0-9]+	End blocks for LoongArch debug support.  */
+
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \
+  loongarch_declare_object (STREAM, NAME, "", ":\n")
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl\t"
+
+/* This says how to define a global common symbol.  */
+
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON loongarch_output_aligned_decl_common
+
+/* This says how to define a local common symbol (i.e., not visible to
+   linker).  */
+
+#ifndef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \
+  loongarch_declare_common_object (STREAM, NAME, "\n\t.lcomm\t", SIZE, ALIGN, \
+				   false)
+#endif
+
+/* This says how to output an external.  It would be possible not to
+   output anything and let undefined symbol become external.  However
+   the assembler uses length information on externals to allocate in
+   data/sdata bss/sbss, thereby saving exec time.  */
+
+#undef ASM_OUTPUT_EXTERNAL
+#define ASM_OUTPUT_EXTERNAL(STREAM, DECL, NAME) \
+  loongarch_output_external (STREAM, DECL, NAME)
+
+/* This is how to declare a function name.  The actual work of
+   emitting the label is moved to function_prologue, so that we can
+   get the line number correctly emitted before the .ent directive,
+   and after any .file directives.  Define as empty so that the function
+   is not declared before the .ent directive elsewhere.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \
+  loongarch_declare_function_name (STREAM, NAME, DECL)
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
+  sprintf ((LABEL), "*%s%s%ld", (LOCAL_LABEL_PREFIX), (PREFIX), (long) (NUM))
+
+/* Print debug labels as "foo = ." rather than "foo:" because they should
+   represent a byte pointer rather than an ISA-encoded address.  This is
+   particularly important for code like:
+
+	$LFBxxx = .
+		.cfi_startproc
+		...
+		.section .gcc_except_table,...
+		...
+		.uleb128 foo-$LFBxxx
+
+   The .uleb128 requies $LFBxxx to match the FDE start address, which is
+   likewise a byte pointer rather than an ISA-encoded address.
+
+   At the time of writing, this hook is not used for the function end
+   label:
+
+	$LFExxx:
+		.end foo
+
+   */
+
+#define ASM_OUTPUT_DEBUG_LABEL(FILE, PREFIX, NUM) \
+  fprintf (FILE, "%s%s%d = .\n", LOCAL_LABEL_PREFIX, PREFIX, NUM)
+
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
+  fprintf (STREAM, "\t%s\t%sL%d\n", ptr_mode == DImode ? ".dword" : ".word", \
+	   LOCAL_LABEL_PREFIX, VALUE)
+
+/* This is how to output an element of a case-vector.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
+  do \
+    { \
+      fprintf (STREAM, "\t%s\t%sL%d-%sL%d\n", \
+	       ptr_mode == DImode ? ".dword" : ".word", LOCAL_LABEL_PREFIX, \
+	       VALUE, LOCAL_LABEL_PREFIX, REL); \
+    } \
+  while (0)
+
+#define JUMP_TABLES_IN_TEXT_SECTION 0
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG))
+
+/* "nop" instruction 54525952 (andi $r0,$r0,0) is
+   used for padding.  */
+#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \
+  fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG))
+
+/* This is how to output an assembler line to advance the location
+   counter by SIZE bytes.  */
+
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(STREAM, SIZE) \
+  fprintf (STREAM, "\t.space\t" HOST_WIDE_INT_PRINT_UNSIGNED "\n", (SIZE))
+
+/* This is how to output a string.  */
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII loongarch_output_ascii
+
+/* Define the strings to put out for each section in the object file.  */
+#define TEXT_SECTION_ASM_OP "\t.text" /* instructions  */
+#define DATA_SECTION_ASM_OP "\t.data" /* large data  */
+
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" /* read-only data */
+
+#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO)	\
+  do \
+    { \
+      fprintf (STREAM, "\t%s\t%s,%s,-8\n\t%s\t%s,%s,0\n", \
+	       TARGET_64BIT ? "addi.d" : "addi.w", \
+	       reg_names[STACK_POINTER_REGNUM], \
+	       reg_names[STACK_POINTER_REGNUM], \
+	       TARGET_64BIT ? "st.d" : "st.w", reg_names[REGNO], \
+	       reg_names[STACK_POINTER_REGNUM]); \
+    } \
+  while (0)
+
+#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \
+  do \
+    { \
+      fprintf (STREAM, "\t%s\t%s,%s,0\n\t%s\t%s,%s,8\n", \
+	       TARGET_64BIT ? "ld.d" : "ld.w", reg_names[REGNO], \
+	       reg_names[STACK_POINTER_REGNUM], \
+	       TARGET_64BIT ? "addi.d" : "addi.w", \
+	       reg_names[STACK_POINTER_REGNUM], \
+	       reg_names[STACK_POINTER_REGNUM]); \
+    } \
+  while (0)
+
+/* How to start an assembler comment.
+   The leading space is important (the loongarch native assembler requires it).
+ */
+#ifndef ASM_COMMENT_START
+#define ASM_COMMENT_START " #"
+#endif
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (POINTER_SIZE == 64 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
+
+/* The maximum number of bytes that can be copied by one iteration of
+   a cpymemsi loop; see loongarch_block_move_loop.  */
+#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4)
+
+/* The maximum number of bytes that can be copied by a straight-line
+   implementation of cpymemsi; see loongarch_block_move_straight.  We want
+   to make sure that any loop-based implementation will iterate at
+   least twice.  */
+#define LARCH_MAX_MOVE_BYTES_STRAIGHT (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
+
+/* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
+   values were determined experimentally by benchmarking with CSiBE.
+*/
+#define LARCH_CALL_RATIO 8
+
+/* Any loop-based implementation of cpymemsi will have at least
+   LARCH_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
+   moves, so allow individual copies of fewer elements.
+
+   When cpymemsi is not available, use a value approximating
+   the length of a memcpy call sequence, so that move_by_pieces
+   will generate inline code if it is shorter than a function call.
+   Since move_by_pieces_ninsns counts memory-to-memory moves, but
+   we'll have to generate a load/store pair for each, halve the
+   value of LARCH_CALL_RATIO to take that into account.  */
+
+#define MOVE_RATIO(speed) \
+  (HAVE_cpymemsi \
+   ? LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD \
+   : CLEAR_RATIO (speed) / 2)
+
+/* For CLEAR_RATIO, when optimizing for size, give a better estimate
+   of the length of a memset call, but use the default otherwise.  */
+
+#define CLEAR_RATIO(speed) ((speed) ? 15 : LARCH_CALL_RATIO)
+
+/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
+   optimizing for size adjust the ratio to account for the overhead of
+   loading the constant and replicating it across the word.  */
+
+#define SET_RATIO(speed) ((speed) ? 15 : LARCH_CALL_RATIO - 2)
+
+#ifndef USED_FOR_TARGET
+extern const enum reg_class loongarch_regno_to_class[];
+extern int loongarch_dwarf_regno[];
+
+/* Information about a function's frame layout.  */
+struct GTY (()) loongarch_frame_info
+{
+  /* The size of the frame in bytes.  */
+  HOST_WIDE_INT total_size;
+
+  /* Bit X is set if the function saves or restores GPR X.  */
+  unsigned int mask;
+
+  /* Likewise FPR X.  */
+  unsigned int fmask;
+
+  /* How much the GPR save/restore routines adjust sp (or 0 if unused).  */
+  unsigned save_libcall_adjustment;
+
+  /* Offsets of fixed-point and floating-point save areas from frame
+     bottom.  */
+  HOST_WIDE_INT gp_sp_offset;
+  HOST_WIDE_INT fp_sp_offset;
+
+  /* Offset of virtual frame pointer from stack pointer/frame bottom.  */
+  HOST_WIDE_INT frame_pointer_offset;
+
+  /* Offset of hard frame pointer from stack pointer/frame bottom.  */
+  HOST_WIDE_INT hard_frame_pointer_offset;
+
+  /* The offset of arg_pointer_rtx from the bottom of the frame.  */
+  HOST_WIDE_INT arg_pointer_offset;
+};
+
+struct GTY (()) machine_function
+{
+  /* The next floating-point condition-code register to allocate
+     for 8CC targets, relative to FCC_REG_FIRST.  */
+  unsigned int next_fcc;
+
+  /* The number of extra stack bytes taken up by register varargs.
+     This area is allocated by the callee at the very top of the frame.  */
+  int varargs_size;
+
+  /* The current frame information, calculated by loongarch_compute_frame_info.
+   */
+  struct loongarch_frame_info frame;
+
+  /* True if at least one of the formal parameters to a function must be
+     written to the frame header (probably so its address can be taken).  */
+  bool does_not_use_frame_header;
+
+  /* True if none of the functions that are called by this function need
+     stack space allocated for their arguments.  */
+  bool optimize_call_stack;
+
+  /* True if one of the functions calling this function may not allocate
+     a frame header.  */
+  bool callers_may_not_allocate_frame;
+};
+#endif
+
+/* As on most targets, we want the .eh_frame section to be read-only where
+   possible.  And as on most targets, this means two things:
+
+     (a) Non-locally-binding pointers must have an indirect encoding,
+	 so that the addresses in the .eh_frame section itself become
+	 locally-binding.
+
+     (b) A shared library's .eh_frame section must encode locally-binding
+	 pointers in a relative (relocation-free) form.
+
+   However, LoongArch has traditionally not allowed directives like:
+
+	.long	x-.
+
+   in cases where "x" is in a different section, or is not defined in the
+   same assembly file.  We are therefore unable to emit the PC-relative
+   form required by (b) at assembly time.
+
+   Fortunately, the linker is able to convert absolute addresses into
+   PC-relative addresses on our behalf.  Unfortunately, only certain
+   versions of the linker know how to do this for indirect pointers,
+   and for personality data.  We must fall back on using writable
+   .eh_frame sections for shared libraries if the linker does not
+   support this feature.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr)
+
+/* Several named LoongArch patterns depend on Pmode.  These patterns have the
+   form <NAME>_si for Pmode == SImode and <NAME>_di for Pmode == DImode.
+   Add the appropriate suffix to generator function NAME and invoke it
+   with arguments ARGS.  */
+#define PMODE_INSN(NAME, ARGS) \
+  (Pmode == SImode ? NAME##_si ARGS : NAME##_di ARGS)
+
+/* Do emit .note.GNU-stack by default.  */
+#ifndef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+#endif
+
+/* The `Q' extension is not yet supported.  */
+/* TODO: according to march.  */
+#define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
+
+/* The largest type that can be passed in floating-point registers.  */
+/* TODO: according to mabi.  */
+#define UNITS_PER_FP_ARG  \
+  (TARGET_HARD_FLOAT ? (TARGET_DOUBLE_FLOAT ? 8 : 4) : 0)
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN)