Message ID | 20190822134551.18924-4-iii@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | S/390: Use signaling FP comparison instructions | expand |
Ilya Leoshkevich <iii@linux.ibm.com> writes: > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, > return 0; > } > > +/* can_vector_compare_p presents fake rtx binary operations to the the back-end > + in order to determine its capabilities. In order to avoid creating fake > + operations on each call, values from previous calls are cached in a global > + cached_binops hash_table. It contains rtxes, which can be looked up using > + binop_keys. */ > + > +struct binop_key { > + enum rtx_code code; /* Operation code. */ > + machine_mode value_mode; /* Result mode. */ > + machine_mode cmp_op_mode; /* Operand mode. */ > +}; > + > +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { > + typedef rtx value_type; > + typedef binop_key compare_type; > + > + static hashval_t > + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) > + { > + inchash::hash hstate (0); > + hstate.add_int (code); > + hstate.add_int (value_mode); > + hstate.add_int (cmp_op_mode); > + return hstate.end (); > + } > + > + static hashval_t > + hash (const rtx &ref) > + { > + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); > + } > + > + static bool > + equal (const rtx &ref1, const binop_key &ref2) > + { > + return (GET_CODE (ref1) == ref2.code) > + && (GET_MODE (ref1) == ref2.value_mode) > + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > + } > +}; > + > +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; > + > +static rtx > +get_cached_binop (enum rtx_code code, machine_mode value_mode, > + machine_mode cmp_op_mode) > +{ > + if (!cached_binops) > + cached_binops = hash_table<binop_hasher>::create_ggc (1024); > + binop_key key = { code, value_mode, cmp_op_mode }; > + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > + if (!*slot) > + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > + gen_reg_rtx (cmp_op_mode)); > + return *slot; > +} Sorry, I didn't mean anything this complicated. I just meant that we should have a single cached rtx that we can change via PUT_CODE and PUT_MODE_RAW for each new query, rather than allocating a new rtx each time. Something like: static GTY ((cache)) rtx cached_binop; rtx get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) { if (cached_binop) { PUT_CODE (cached_binop, code); PUT_MODE_RAW (cached_binop, mode); PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); } else { rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); } return cached_binop; } Thanks, Richard
On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford <richard.sandiford@arm.com> wrote: > > Ilya Leoshkevich <iii@linux.ibm.com> writes: > > @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, > > return 0; > > } > > > > +/* can_vector_compare_p presents fake rtx binary operations to the the back-end > > + in order to determine its capabilities. In order to avoid creating fake > > + operations on each call, values from previous calls are cached in a global > > + cached_binops hash_table. It contains rtxes, which can be looked up using > > + binop_keys. */ > > + > > +struct binop_key { > > + enum rtx_code code; /* Operation code. */ > > + machine_mode value_mode; /* Result mode. */ > > + machine_mode cmp_op_mode; /* Operand mode. */ > > +}; > > + > > +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { > > + typedef rtx value_type; > > + typedef binop_key compare_type; > > + > > + static hashval_t > > + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) > > + { > > + inchash::hash hstate (0); > > + hstate.add_int (code); > > + hstate.add_int (value_mode); > > + hstate.add_int (cmp_op_mode); > > + return hstate.end (); > > + } > > + > > + static hashval_t > > + hash (const rtx &ref) > > + { > > + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); > > + } > > + > > + static bool > > + equal (const rtx &ref1, const binop_key &ref2) > > + { > > + return (GET_CODE (ref1) == ref2.code) > > + && (GET_MODE (ref1) == ref2.value_mode) > > + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > > + } > > +}; > > + > > +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; > > + > > +static rtx > > +get_cached_binop (enum rtx_code code, machine_mode value_mode, > > + machine_mode cmp_op_mode) > > +{ > > + if (!cached_binops) > > + cached_binops = hash_table<binop_hasher>::create_ggc (1024); > > + binop_key key = { code, value_mode, cmp_op_mode }; > > + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > > + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > > + if (!*slot) > > + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > > + gen_reg_rtx (cmp_op_mode)); > > + return *slot; > > +} > > Sorry, I didn't mean anything this complicated. I just meant that > we should have a single cached rtx that we can change via PUT_CODE and > PUT_MODE_RAW for each new query, rather than allocating a new rtx each > time. > > Something like: > > static GTY ((cache)) rtx cached_binop; > > rtx > get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > { > if (cached_binop) > { > PUT_CODE (cached_binop, code); > PUT_MODE_RAW (cached_binop, mode); > PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > } > else > { > rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > } > return cached_binop; > } Hmm, maybe we need auto_rtx (code) that constructs such RTX on the stack instead of wasting a GC root (and causing issues for future threading of GCC ;)). Richard. > > Thanks, > Richard
> Am 23.08.2019 um 12:43 schrieb Richard Sandiford <richard.sandiford@arm.com>: > > Ilya Leoshkevich <iii@linux.ibm.com> writes: >> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >> return 0; >> } >> >> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end >> + in order to determine its capabilities. In order to avoid creating fake >> + operations on each call, values from previous calls are cached in a global >> + cached_binops hash_table. It contains rtxes, which can be looked up using >> + binop_keys. */ >> + >> +struct binop_key { >> + enum rtx_code code; /* Operation code. */ >> + machine_mode value_mode; /* Result mode. */ >> + machine_mode cmp_op_mode; /* Operand mode. */ >> +}; >> + >> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { >> + typedef rtx value_type; >> + typedef binop_key compare_type; >> + >> + static hashval_t >> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) >> + { >> + inchash::hash hstate (0); >> + hstate.add_int (code); >> + hstate.add_int (value_mode); >> + hstate.add_int (cmp_op_mode); >> + return hstate.end (); >> + } >> + >> + static hashval_t >> + hash (const rtx &ref) >> + { >> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >> + } >> + >> + static bool >> + equal (const rtx &ref1, const binop_key &ref2) >> + { >> + return (GET_CODE (ref1) == ref2.code) >> + && (GET_MODE (ref1) == ref2.value_mode) >> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >> + } >> +}; >> + >> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; >> + >> +static rtx >> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >> + machine_mode cmp_op_mode) >> +{ >> + if (!cached_binops) >> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); >> + binop_key key = { code, value_mode, cmp_op_mode }; >> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >> + if (!*slot) >> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >> + gen_reg_rtx (cmp_op_mode)); >> + return *slot; >> +} > > Sorry, I didn't mean anything this complicated. I just meant that > we should have a single cached rtx that we can change via PUT_CODE and > PUT_MODE_RAW for each new query, rather than allocating a new rtx each > time. > > Something like: > > static GTY ((cache)) rtx cached_binop; > > rtx > get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > { > if (cached_binop) > { > PUT_CODE (cached_binop, code); > PUT_MODE_RAW (cached_binop, mode); > PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > } > else > { > rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > } > return cached_binop; > } > > Thanks, > Richard Oh, I must have completely missed the point: the cache is only for storage, and stored values themselves don't really matter. To make rtx usable with GTY ((cache)) I had to do: --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -4427,4 +4427,9 @@ extern void gt_ggc_mx (rtx &); extern void gt_pch_nx (rtx &); extern void gt_pch_nx (rtx &, gt_pointer_operator, void *); +inline void +gt_cleare_cache (rtx) +{ +} + #endif /* ! GCC_RTL_H */ Does that look ok? Another think that might turn out being important: in your first suggestion you use if (insn_operand_predicate_fn pred = insn_data[icode].operand[3].predicate) { machine_mode cmp_mode = insn_data[icode].operand[3].mode; instead of simply insn_operand_matches - is there any difference?
> Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>: > > On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > <richard.sandiford@arm.com> wrote: >> >> Ilya Leoshkevich <iii@linux.ibm.com> writes: >>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >>> return 0; >>> } >>> >>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end >>> + in order to determine its capabilities. In order to avoid creating fake >>> + operations on each call, values from previous calls are cached in a global >>> + cached_binops hash_table. It contains rtxes, which can be looked up using >>> + binop_keys. */ >>> + >>> +struct binop_key { >>> + enum rtx_code code; /* Operation code. */ >>> + machine_mode value_mode; /* Result mode. */ >>> + machine_mode cmp_op_mode; /* Operand mode. */ >>> +}; >>> + >>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { >>> + typedef rtx value_type; >>> + typedef binop_key compare_type; >>> + >>> + static hashval_t >>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) >>> + { >>> + inchash::hash hstate (0); >>> + hstate.add_int (code); >>> + hstate.add_int (value_mode); >>> + hstate.add_int (cmp_op_mode); >>> + return hstate.end (); >>> + } >>> + >>> + static hashval_t >>> + hash (const rtx &ref) >>> + { >>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >>> + } >>> + >>> + static bool >>> + equal (const rtx &ref1, const binop_key &ref2) >>> + { >>> + return (GET_CODE (ref1) == ref2.code) >>> + && (GET_MODE (ref1) == ref2.value_mode) >>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>> + } >>> +}; >>> + >>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; >>> + >>> +static rtx >>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>> + machine_mode cmp_op_mode) >>> +{ >>> + if (!cached_binops) >>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); >>> + binop_key key = { code, value_mode, cmp_op_mode }; >>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>> + if (!*slot) >>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >>> + gen_reg_rtx (cmp_op_mode)); >>> + return *slot; >>> +} >> >> Sorry, I didn't mean anything this complicated. I just meant that >> we should have a single cached rtx that we can change via PUT_CODE and >> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >> time. >> >> Something like: >> >> static GTY ((cache)) rtx cached_binop; >> >> rtx >> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >> { >> if (cached_binop) >> { >> PUT_CODE (cached_binop, code); >> PUT_MODE_RAW (cached_binop, mode); >> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >> } >> else >> { >> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >> } >> return cached_binop; >> } > > Hmm, maybe we need auto_rtx (code) that constructs such > RTX on the stack instead of wasting a GC root (and causing > issues for future threading of GCC ;)). Do you mean something like this? union { char raw[rtx_code_size[code]]; rtx rtx; } binop; Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show anything useful), or should I implement this?
Ilya Leoshkevich <iii@linux.ibm.com> writes: >> Am 23.08.2019 um 12:43 schrieb Richard Sandiford <richard.sandiford@arm.com>: >> >> Ilya Leoshkevich <iii@linux.ibm.com> writes: >>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >>> return 0; >>> } >>> >>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end >>> + in order to determine its capabilities. In order to avoid creating fake >>> + operations on each call, values from previous calls are cached in a global >>> + cached_binops hash_table. It contains rtxes, which can be looked up using >>> + binop_keys. */ >>> + >>> +struct binop_key { >>> + enum rtx_code code; /* Operation code. */ >>> + machine_mode value_mode; /* Result mode. */ >>> + machine_mode cmp_op_mode; /* Operand mode. */ >>> +}; >>> + >>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { >>> + typedef rtx value_type; >>> + typedef binop_key compare_type; >>> + >>> + static hashval_t >>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) >>> + { >>> + inchash::hash hstate (0); >>> + hstate.add_int (code); >>> + hstate.add_int (value_mode); >>> + hstate.add_int (cmp_op_mode); >>> + return hstate.end (); >>> + } >>> + >>> + static hashval_t >>> + hash (const rtx &ref) >>> + { >>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >>> + } >>> + >>> + static bool >>> + equal (const rtx &ref1, const binop_key &ref2) >>> + { >>> + return (GET_CODE (ref1) == ref2.code) >>> + && (GET_MODE (ref1) == ref2.value_mode) >>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>> + } >>> +}; >>> + >>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; >>> + >>> +static rtx >>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>> + machine_mode cmp_op_mode) >>> +{ >>> + if (!cached_binops) >>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); >>> + binop_key key = { code, value_mode, cmp_op_mode }; >>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>> + if (!*slot) >>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >>> + gen_reg_rtx (cmp_op_mode)); >>> + return *slot; >>> +} >> >> Sorry, I didn't mean anything this complicated. I just meant that >> we should have a single cached rtx that we can change via PUT_CODE and >> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >> time. >> >> Something like: >> >> static GTY ((cache)) rtx cached_binop; >> >> rtx >> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >> { >> if (cached_binop) >> { >> PUT_CODE (cached_binop, code); >> PUT_MODE_RAW (cached_binop, mode); >> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >> } >> else >> { >> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >> } >> return cached_binop; >> } >> >> Thanks, >> Richard > > Oh, I must have completely missed the point: the cache is only for > storage, and stored values themselves don't really matter. > > To make rtx usable with GTY ((cache)) I had to do: > > --- a/gcc/rtl.h > +++ b/gcc/rtl.h > @@ -4427,4 +4427,9 @@ extern void gt_ggc_mx (rtx &); > extern void gt_pch_nx (rtx &); > extern void gt_pch_nx (rtx &, gt_pointer_operator, void *); > > +inline void > +gt_cleare_cache (rtx) > +{ > +} > + > #endif /* ! GCC_RTL_H */ > > Does that look ok? Ah, turns out I was thinking of "deletable" rather than "cache", sorry. > Another think that might turn out being important: in your first > suggestion you use > > if (insn_operand_predicate_fn pred = insn_data[icode].operand[3].predicate) > { > machine_mode cmp_mode = insn_data[icode].operand[3].mode; > > instead of simply insn_operand_matches - is there any difference? I guess it was premature optimisation: if the .md file doesn't specify a predicate, we don't even need to create the rtx. But since most targets probably do specify a predicate, using insn_matches is fine too. Thanks, Richard
On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > > Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>: > > > > On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > > <richard.sandiford@arm.com> wrote: > >> > >> Ilya Leoshkevich <iii@linux.ibm.com> writes: > >>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, > >>> return 0; > >>> } > >>> > >>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end > >>> + in order to determine its capabilities. In order to avoid creating fake > >>> + operations on each call, values from previous calls are cached in a global > >>> + cached_binops hash_table. It contains rtxes, which can be looked up using > >>> + binop_keys. */ > >>> + > >>> +struct binop_key { > >>> + enum rtx_code code; /* Operation code. */ > >>> + machine_mode value_mode; /* Result mode. */ > >>> + machine_mode cmp_op_mode; /* Operand mode. */ > >>> +}; > >>> + > >>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { > >>> + typedef rtx value_type; > >>> + typedef binop_key compare_type; > >>> + > >>> + static hashval_t > >>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) > >>> + { > >>> + inchash::hash hstate (0); > >>> + hstate.add_int (code); > >>> + hstate.add_int (value_mode); > >>> + hstate.add_int (cmp_op_mode); > >>> + return hstate.end (); > >>> + } > >>> + > >>> + static hashval_t > >>> + hash (const rtx &ref) > >>> + { > >>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); > >>> + } > >>> + > >>> + static bool > >>> + equal (const rtx &ref1, const binop_key &ref2) > >>> + { > >>> + return (GET_CODE (ref1) == ref2.code) > >>> + && (GET_MODE (ref1) == ref2.value_mode) > >>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > >>> + } > >>> +}; > >>> + > >>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; > >>> + > >>> +static rtx > >>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, > >>> + machine_mode cmp_op_mode) > >>> +{ > >>> + if (!cached_binops) > >>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); > >>> + binop_key key = { code, value_mode, cmp_op_mode }; > >>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > >>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > >>> + if (!*slot) > >>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > >>> + gen_reg_rtx (cmp_op_mode)); > >>> + return *slot; > >>> +} > >> > >> Sorry, I didn't mean anything this complicated. I just meant that > >> we should have a single cached rtx that we can change via PUT_CODE and > >> PUT_MODE_RAW for each new query, rather than allocating a new rtx each > >> time. > >> > >> Something like: > >> > >> static GTY ((cache)) rtx cached_binop; > >> > >> rtx > >> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > >> { > >> if (cached_binop) > >> { > >> PUT_CODE (cached_binop, code); > >> PUT_MODE_RAW (cached_binop, mode); > >> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > >> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > >> } > >> else > >> { > >> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > >> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > >> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > >> } > >> return cached_binop; > >> } > > > > Hmm, maybe we need auto_rtx (code) that constructs such > > RTX on the stack instead of wasting a GC root (and causing > > issues for future threading of GCC ;)). > > Do you mean something like this? > > union { > char raw[rtx_code_size[code]]; > rtx rtx; > } binop; > > Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show > anything useful), or should I implement this? It doesn't exist AFAIK, I thought about using alloca like rtx tem; rtx_alloca (tem, PLUS); and due to using alloca rtx_alloca has to be a macro like #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); maybe C++ can help making this prettier but of course since we use alloca we have to avoid opening new scopes. I guess templates like with auto_vec doesn't work unless we can make RTX_CODE_SIZE constant-evaluated. Richard.
> Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guenther@gmail.com>: > > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: >> >>> Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>: >>> >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford >>> <richard.sandiford@arm.com> wrote: >>>> >>>> Ilya Leoshkevich <iii@linux.ibm.com> writes: >>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >>>>> return 0; >>>>> } >>>>> >>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end >>>>> + in order to determine its capabilities. In order to avoid creating fake >>>>> + operations on each call, values from previous calls are cached in a global >>>>> + cached_binops hash_table. It contains rtxes, which can be looked up using >>>>> + binop_keys. */ >>>>> + >>>>> +struct binop_key { >>>>> + enum rtx_code code; /* Operation code. */ >>>>> + machine_mode value_mode; /* Result mode. */ >>>>> + machine_mode cmp_op_mode; /* Operand mode. */ >>>>> +}; >>>>> + >>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { >>>>> + typedef rtx value_type; >>>>> + typedef binop_key compare_type; >>>>> + >>>>> + static hashval_t >>>>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) >>>>> + { >>>>> + inchash::hash hstate (0); >>>>> + hstate.add_int (code); >>>>> + hstate.add_int (value_mode); >>>>> + hstate.add_int (cmp_op_mode); >>>>> + return hstate.end (); >>>>> + } >>>>> + >>>>> + static hashval_t >>>>> + hash (const rtx &ref) >>>>> + { >>>>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >>>>> + } >>>>> + >>>>> + static bool >>>>> + equal (const rtx &ref1, const binop_key &ref2) >>>>> + { >>>>> + return (GET_CODE (ref1) == ref2.code) >>>>> + && (GET_MODE (ref1) == ref2.value_mode) >>>>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>>>> + } >>>>> +}; >>>>> + >>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; >>>>> + >>>>> +static rtx >>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>>>> + machine_mode cmp_op_mode) >>>>> +{ >>>>> + if (!cached_binops) >>>>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); >>>>> + binop_key key = { code, value_mode, cmp_op_mode }; >>>>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>>>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>>>> + if (!*slot) >>>>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >>>>> + gen_reg_rtx (cmp_op_mode)); >>>>> + return *slot; >>>>> +} >>>> >>>> Sorry, I didn't mean anything this complicated. I just meant that >>>> we should have a single cached rtx that we can change via PUT_CODE and >>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >>>> time. >>>> >>>> Something like: >>>> >>>> static GTY ((cache)) rtx cached_binop; >>>> >>>> rtx >>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >>>> { >>>> if (cached_binop) >>>> { >>>> PUT_CODE (cached_binop, code); >>>> PUT_MODE_RAW (cached_binop, mode); >>>> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >>>> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >>>> } >>>> else >>>> { >>>> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >>>> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >>>> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >>>> } >>>> return cached_binop; >>>> } >>> >>> Hmm, maybe we need auto_rtx (code) that constructs such >>> RTX on the stack instead of wasting a GC root (and causing >>> issues for future threading of GCC ;)). >> >> Do you mean something like this? >> >> union { >> char raw[rtx_code_size[code]]; >> rtx rtx; >> } binop; >> >> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show >> anything useful), or should I implement this? > > It doesn't exist AFAIK, I thought about using alloca like > > rtx tem; > rtx_alloca (tem, PLUS); > > and due to using alloca rtx_alloca has to be a macro like > > #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); > memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); > > maybe C++ can help making this prettier but of course > since we use alloca we have to avoid opening new scopes. > > I guess templates like with auto_vec doesn't work unless > we can make RTX_CODE_SIZE constant-evaluated. > > Richard. I ended up with the following change: diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c index a667cdab94e..97aa2144e95 100644 --- a/gcc/emit-rtl.c +++ b/gcc/emit-rtl.c @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned int regno) set_regno_raw (x, regno, nregs); } -/* Generate a new REG rtx. Make sure ORIGINAL_REGNO is set properly, and +/* Initialize a REG rtx. Make sure ORIGINAL_REGNO is set properly, and don't attempt to share with the various global pieces of rtl (such as frame_pointer_rtx). */ -rtx -gen_raw_REG (machine_mode mode, unsigned int regno) +void +init_raw_REG (rtx x, machine_mode mode, unsigned int regno) { - rtx x = rtx_alloc (REG MEM_STAT_INFO); set_mode_and_regno (x, mode, regno); REG_ATTRS (x) = NULL; ORIGINAL_REGNO (x) = regno; +} + +/* Generate a new REG rtx. */ + +rtx +gen_raw_REG (machine_mode mode, unsigned int regno) +{ + rtx x = rtx_alloc (REG MEM_STAT_INFO); + init_raw_REG (x, mode, regno); return x; } diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c index 5c78fabfb50..bb2087da258 100644 --- a/gcc/gengenrtl.c +++ b/gcc/gengenrtl.c @@ -231,8 +231,7 @@ genmacro (int idx) puts (")"); } -/* Generate the code for the function to generate RTL whose - format is FORMAT. */ +/* Generate the code for functions to generate RTL whose format is FORMAT. */ static void gendef (const char *format) @@ -240,22 +239,18 @@ gendef (const char *format) const char *p; int i, j; - /* Start by writing the definition of the function name and the types + /* Write the definition of the init function name and the types of the arguments. */ - printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); + puts ("static inline void"); + printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format); for (p = format, i = 0; *p != 0; p++) if (*p != '0') printf (",\n\t%sarg%d", type_from_format (*p), i++); + puts (")"); - puts (" MEM_STAT_DECL)"); - - /* Now write out the body of the function itself, which allocates - the memory and initializes it. */ + /* Now write out the body of the init function itself. */ puts ("{"); - puts (" rtx rt;"); - puts (" rt = rtx_alloc (code PASS_MEM_STAT);\n"); - puts (" PUT_MODE_RAW (rt, mode);"); for (p = format, i = j = 0; *p ; ++p, ++i) @@ -266,16 +261,56 @@ gendef (const char *format) else printf (" %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++); - puts ("\n return rt;\n}\n"); + puts ("}\n"); + + /* Write the definition of the gen function name and the types + of the arguments. */ + + puts ("static inline rtx"); + printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); + for (p = format, i = 0; *p != 0; p++) + if (*p != '0') + printf (",\n\t%sarg%d", type_from_format (*p), i++); + puts (" MEM_STAT_DECL)"); + + /* Now write out the body of the function itself, which allocates + the memory and initializes it. */ + puts ("{"); + puts (" rtx rt;\n"); + + puts (" rt = rtx_alloc (code PASS_MEM_STAT);"); + printf (" init_rtx_fmt_%s (rt, mode", format); + for (p = format, i = 0; *p != 0; p++) + if (*p != '0') + printf (", arg%d", i++); + puts (");\n"); + + puts (" return rt;\n}\n"); + + /* Write the definition of gen macro. */ + printf ("#define gen_rtx_fmt_%s(c, m", format); for (p = format, i = 0; *p != 0; p++) if (*p != '0') - printf (", p%i",i++); - printf (")\\\n gen_rtx_fmt_%s_stat (c, m", format); + printf (", arg%d", i++); + printf (") \\\n gen_rtx_fmt_%s_stat ((c), (m)", format); for (p = format, i = 0; *p != 0; p++) if (*p != '0') - printf (", p%i",i++); + printf (", (arg%d)", i++); printf (" MEM_STAT_INFO)\n\n"); + + /* Write the definition of alloca macro. */ + + printf ("#define alloca_rtx_fmt_%s(rt, c, m", format); + for (p = format, i = 0; *p != 0; p++) + if (*p != '0') + printf (", arg%d", i++); + printf (") \\\n rtx_alloca ((rt), (c)); \\\n"); + printf (" init_rtx_fmt_%s ((rt), (m)", format); + for (p = format, i = 0; *p != 0; p++) + if (*p != '0') + printf (", (arg%d)", i++); + printf (")\n\n"); } /* Generate the documentation header for files we write. */ diff --git a/gcc/rtl.h b/gcc/rtl.h index efb9b3ce40d..44733d8a39e 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void); /* In rtl.c */ extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO); +#define rtx_alloca(rt, code) \ + (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \ + memset ((rt), 0, RTX_HDR_SIZE); \ + PUT_CODE ((rt), (code)); extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int); #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ) #define const_wide_int_alloc(NWORDS) \ @@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn, extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT); extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec); extern void set_mode_and_regno (rtx, machine_mode, unsigned int); +extern void init_raw_REG (rtx, machine_mode, unsigned int); extern rtx gen_raw_REG (machine_mode, unsigned int); +#define alloca_raw_REG(rt, mode, regno) \ + rtx_alloca ((rt), REG); \ + init_raw_REG ((rt), (mode), (regno)) extern rtx gen_rtx_REG (machine_mode, unsigned int); extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64); extern rtx gen_rtx_MEM (machine_mode, rtx); which now allows me to write: rtx reg1, reg2, test; alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2); alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2); If that looks ok, I'll resend the series.
On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > > > Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guenther@gmail.com>: > > > > On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > >> > >>> Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>: > >>> > >>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > >>> <richard.sandiford@arm.com> wrote: > >>>> > >>>> Ilya Leoshkevich <iii@linux.ibm.com> writes: > >>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, > >>>>> return 0; > >>>>> } > >>>>> > >>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end > >>>>> + in order to determine its capabilities. In order to avoid creating fake > >>>>> + operations on each call, values from previous calls are cached in a global > >>>>> + cached_binops hash_table. It contains rtxes, which can be looked up using > >>>>> + binop_keys. */ > >>>>> + > >>>>> +struct binop_key { > >>>>> + enum rtx_code code; /* Operation code. */ > >>>>> + machine_mode value_mode; /* Result mode. */ > >>>>> + machine_mode cmp_op_mode; /* Operand mode. */ > >>>>> +}; > >>>>> + > >>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { > >>>>> + typedef rtx value_type; > >>>>> + typedef binop_key compare_type; > >>>>> + > >>>>> + static hashval_t > >>>>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) > >>>>> + { > >>>>> + inchash::hash hstate (0); > >>>>> + hstate.add_int (code); > >>>>> + hstate.add_int (value_mode); > >>>>> + hstate.add_int (cmp_op_mode); > >>>>> + return hstate.end (); > >>>>> + } > >>>>> + > >>>>> + static hashval_t > >>>>> + hash (const rtx &ref) > >>>>> + { > >>>>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); > >>>>> + } > >>>>> + > >>>>> + static bool > >>>>> + equal (const rtx &ref1, const binop_key &ref2) > >>>>> + { > >>>>> + return (GET_CODE (ref1) == ref2.code) > >>>>> + && (GET_MODE (ref1) == ref2.value_mode) > >>>>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > >>>>> + } > >>>>> +}; > >>>>> + > >>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; > >>>>> + > >>>>> +static rtx > >>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, > >>>>> + machine_mode cmp_op_mode) > >>>>> +{ > >>>>> + if (!cached_binops) > >>>>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); > >>>>> + binop_key key = { code, value_mode, cmp_op_mode }; > >>>>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > >>>>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > >>>>> + if (!*slot) > >>>>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > >>>>> + gen_reg_rtx (cmp_op_mode)); > >>>>> + return *slot; > >>>>> +} > >>>> > >>>> Sorry, I didn't mean anything this complicated. I just meant that > >>>> we should have a single cached rtx that we can change via PUT_CODE and > >>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each > >>>> time. > >>>> > >>>> Something like: > >>>> > >>>> static GTY ((cache)) rtx cached_binop; > >>>> > >>>> rtx > >>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > >>>> { > >>>> if (cached_binop) > >>>> { > >>>> PUT_CODE (cached_binop, code); > >>>> PUT_MODE_RAW (cached_binop, mode); > >>>> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > >>>> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > >>>> } > >>>> else > >>>> { > >>>> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > >>>> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > >>>> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > >>>> } > >>>> return cached_binop; > >>>> } > >>> > >>> Hmm, maybe we need auto_rtx (code) that constructs such > >>> RTX on the stack instead of wasting a GC root (and causing > >>> issues for future threading of GCC ;)). > >> > >> Do you mean something like this? > >> > >> union { > >> char raw[rtx_code_size[code]]; > >> rtx rtx; > >> } binop; > >> > >> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show > >> anything useful), or should I implement this? > > > > It doesn't exist AFAIK, I thought about using alloca like > > > > rtx tem; > > rtx_alloca (tem, PLUS); > > > > and due to using alloca rtx_alloca has to be a macro like > > > > #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); > > memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); > > > > maybe C++ can help making this prettier but of course > > since we use alloca we have to avoid opening new scopes. > > > > I guess templates like with auto_vec doesn't work unless > > we can make RTX_CODE_SIZE constant-evaluated. > > > > Richard. > > I ended up with the following change: > > diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c > index a667cdab94e..97aa2144e95 100644 > --- a/gcc/emit-rtl.c > +++ b/gcc/emit-rtl.c > @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned int regno) > set_regno_raw (x, regno, nregs); > } > > -/* Generate a new REG rtx. Make sure ORIGINAL_REGNO is set properly, and > +/* Initialize a REG rtx. Make sure ORIGINAL_REGNO is set properly, and > don't attempt to share with the various global pieces of rtl (such as > frame_pointer_rtx). */ > > -rtx > -gen_raw_REG (machine_mode mode, unsigned int regno) > +void > +init_raw_REG (rtx x, machine_mode mode, unsigned int regno) > { > - rtx x = rtx_alloc (REG MEM_STAT_INFO); > set_mode_and_regno (x, mode, regno); > REG_ATTRS (x) = NULL; > ORIGINAL_REGNO (x) = regno; > +} > + > +/* Generate a new REG rtx. */ > + > +rtx > +gen_raw_REG (machine_mode mode, unsigned int regno) > +{ > + rtx x = rtx_alloc (REG MEM_STAT_INFO); > + init_raw_REG (x, mode, regno); > return x; > } > > diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c > index 5c78fabfb50..bb2087da258 100644 > --- a/gcc/gengenrtl.c > +++ b/gcc/gengenrtl.c > @@ -231,8 +231,7 @@ genmacro (int idx) > puts (")"); > } > > -/* Generate the code for the function to generate RTL whose > - format is FORMAT. */ > +/* Generate the code for functions to generate RTL whose format is FORMAT. */ > > static void > gendef (const char *format) > @@ -240,22 +239,18 @@ gendef (const char *format) > const char *p; > int i, j; > > - /* Start by writing the definition of the function name and the types > + /* Write the definition of the init function name and the types > of the arguments. */ > > - printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); > + puts ("static inline void"); > + printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format); > for (p = format, i = 0; *p != 0; p++) > if (*p != '0') > printf (",\n\t%sarg%d", type_from_format (*p), i++); > + puts (")"); > > - puts (" MEM_STAT_DECL)"); > - > - /* Now write out the body of the function itself, which allocates > - the memory and initializes it. */ > + /* Now write out the body of the init function itself. */ > puts ("{"); > - puts (" rtx rt;"); > - puts (" rt = rtx_alloc (code PASS_MEM_STAT);\n"); > - > puts (" PUT_MODE_RAW (rt, mode);"); > > for (p = format, i = j = 0; *p ; ++p, ++i) > @@ -266,16 +261,56 @@ gendef (const char *format) > else > printf (" %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++); > > - puts ("\n return rt;\n}\n"); > + puts ("}\n"); > + > + /* Write the definition of the gen function name and the types > + of the arguments. */ > + > + puts ("static inline rtx"); > + printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); > + for (p = format, i = 0; *p != 0; p++) > + if (*p != '0') > + printf (",\n\t%sarg%d", type_from_format (*p), i++); > + puts (" MEM_STAT_DECL)"); > + > + /* Now write out the body of the function itself, which allocates > + the memory and initializes it. */ > + puts ("{"); > + puts (" rtx rt;\n"); > + > + puts (" rt = rtx_alloc (code PASS_MEM_STAT);"); > + printf (" init_rtx_fmt_%s (rt, mode", format); > + for (p = format, i = 0; *p != 0; p++) > + if (*p != '0') > + printf (", arg%d", i++); > + puts (");\n"); > + > + puts (" return rt;\n}\n"); > + > + /* Write the definition of gen macro. */ > + > printf ("#define gen_rtx_fmt_%s(c, m", format); > for (p = format, i = 0; *p != 0; p++) > if (*p != '0') > - printf (", p%i",i++); > - printf (")\\\n gen_rtx_fmt_%s_stat (c, m", format); > + printf (", arg%d", i++); > + printf (") \\\n gen_rtx_fmt_%s_stat ((c), (m)", format); > for (p = format, i = 0; *p != 0; p++) > if (*p != '0') > - printf (", p%i",i++); > + printf (", (arg%d)", i++); > printf (" MEM_STAT_INFO)\n\n"); > + > + /* Write the definition of alloca macro. */ > + > + printf ("#define alloca_rtx_fmt_%s(rt, c, m", format); > + for (p = format, i = 0; *p != 0; p++) > + if (*p != '0') > + printf (", arg%d", i++); > + printf (") \\\n rtx_alloca ((rt), (c)); \\\n"); > + printf (" init_rtx_fmt_%s ((rt), (m)", format); > + for (p = format, i = 0; *p != 0; p++) > + if (*p != '0') > + printf (", (arg%d)", i++); > + printf (")\n\n"); > } > > /* Generate the documentation header for files we write. */ > diff --git a/gcc/rtl.h b/gcc/rtl.h > index efb9b3ce40d..44733d8a39e 100644 > --- a/gcc/rtl.h > +++ b/gcc/rtl.h > @@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void); > > /* In rtl.c */ > extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO); > +#define rtx_alloca(rt, code) \ > + (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \ > + memset ((rt), 0, RTX_HDR_SIZE); \ > + PUT_CODE ((rt), (code)); > extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int); > #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ) > #define const_wide_int_alloc(NWORDS) \ > @@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn, > extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT); > extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec); > extern void set_mode_and_regno (rtx, machine_mode, unsigned int); > +extern void init_raw_REG (rtx, machine_mode, unsigned int); > extern rtx gen_raw_REG (machine_mode, unsigned int); > +#define alloca_raw_REG(rt, mode, regno) \ > + rtx_alloca ((rt), REG); \ > + init_raw_REG ((rt), (mode), (regno)) > extern rtx gen_rtx_REG (machine_mode, unsigned int); > extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64); > extern rtx gen_rtx_MEM (machine_mode, rtx); > > which now allows me to write: > > rtx reg1, reg2, test; > alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); > alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2); > alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2); > > If that looks ok, I'll resend the series. that looks OK to me - please leave Richard S. time to comment. Also while I'd like to see rtx reg1 = alloca_raw_REG (cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); I don't really see a way to write that portably (or at all), do you all agree? GCC doesn't seem to convert alloca() calls to __builtin_stack_save/restore nor place CLOBBERs to end their lifetime. But is it guaranteed that the alloca result is valid until frame termination? Thanks, Richard.
> Am 26.08.2019 um 15:06 schrieb Richard Biener <richard.guenther@gmail.com>: > > On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: >> >>> Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guenther@gmail.com>: >>> >>> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: >>>> >>>>> Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>: >>>>> >>>>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford >>>>> <richard.sandiford@arm.com> wrote: >>>>>> >>>>>> Ilya Leoshkevich <iii@linux.ibm.com> writes: >>>>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >>>>>>> return 0; >>>>>>> } >>>>>>> >>>>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end >>>>>>> + in order to determine its capabilities. In order to avoid creating fake >>>>>>> + operations on each call, values from previous calls are cached in a global >>>>>>> + cached_binops hash_table. It contains rtxes, which can be looked up using >>>>>>> + binop_keys. */ >>>>>>> + >>>>>>> +struct binop_key { >>>>>>> + enum rtx_code code; /* Operation code. */ >>>>>>> + machine_mode value_mode; /* Result mode. */ >>>>>>> + machine_mode cmp_op_mode; /* Operand mode. */ >>>>>>> +}; >>>>>>> + >>>>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { >>>>>>> + typedef rtx value_type; >>>>>>> + typedef binop_key compare_type; >>>>>>> + >>>>>>> + static hashval_t >>>>>>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) >>>>>>> + { >>>>>>> + inchash::hash hstate (0); >>>>>>> + hstate.add_int (code); >>>>>>> + hstate.add_int (value_mode); >>>>>>> + hstate.add_int (cmp_op_mode); >>>>>>> + return hstate.end (); >>>>>>> + } >>>>>>> + >>>>>>> + static hashval_t >>>>>>> + hash (const rtx &ref) >>>>>>> + { >>>>>>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >>>>>>> + } >>>>>>> + >>>>>>> + static bool >>>>>>> + equal (const rtx &ref1, const binop_key &ref2) >>>>>>> + { >>>>>>> + return (GET_CODE (ref1) == ref2.code) >>>>>>> + && (GET_MODE (ref1) == ref2.value_mode) >>>>>>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>>>>>> + } >>>>>>> +}; >>>>>>> + >>>>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; >>>>>>> + >>>>>>> +static rtx >>>>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>>>>>> + machine_mode cmp_op_mode) >>>>>>> +{ >>>>>>> + if (!cached_binops) >>>>>>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); >>>>>>> + binop_key key = { code, value_mode, cmp_op_mode }; >>>>>>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>>>>>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>>>>>> + if (!*slot) >>>>>>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >>>>>>> + gen_reg_rtx (cmp_op_mode)); >>>>>>> + return *slot; >>>>>>> +} >>>>>> >>>>>> Sorry, I didn't mean anything this complicated. I just meant that >>>>>> we should have a single cached rtx that we can change via PUT_CODE and >>>>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >>>>>> time. >>>>>> >>>>>> Something like: >>>>>> >>>>>> static GTY ((cache)) rtx cached_binop; >>>>>> >>>>>> rtx >>>>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >>>>>> { >>>>>> if (cached_binop) >>>>>> { >>>>>> PUT_CODE (cached_binop, code); >>>>>> PUT_MODE_RAW (cached_binop, mode); >>>>>> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >>>>>> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >>>>>> } >>>>>> else >>>>>> { >>>>>> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >>>>>> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >>>>>> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >>>>>> } >>>>>> return cached_binop; >>>>>> } >>>>> >>>>> Hmm, maybe we need auto_rtx (code) that constructs such >>>>> RTX on the stack instead of wasting a GC root (and causing >>>>> issues for future threading of GCC ;)). >>>> >>>> Do you mean something like this? >>>> >>>> union { >>>> char raw[rtx_code_size[code]]; >>>> rtx rtx; >>>> } binop; >>>> >>>> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show >>>> anything useful), or should I implement this? >>> >>> It doesn't exist AFAIK, I thought about using alloca like >>> >>> rtx tem; >>> rtx_alloca (tem, PLUS); >>> >>> and due to using alloca rtx_alloca has to be a macro like >>> >>> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); >>> memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); >>> >>> maybe C++ can help making this prettier but of course >>> since we use alloca we have to avoid opening new scopes. >>> >>> I guess templates like with auto_vec doesn't work unless >>> we can make RTX_CODE_SIZE constant-evaluated. >>> >>> Richard. >> >> I ended up with the following change: >> >> diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c >> index a667cdab94e..97aa2144e95 100644 >> --- a/gcc/emit-rtl.c >> +++ b/gcc/emit-rtl.c >> @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned int regno) >> set_regno_raw (x, regno, nregs); >> } >> >> -/* Generate a new REG rtx. Make sure ORIGINAL_REGNO is set properly, and >> +/* Initialize a REG rtx. Make sure ORIGINAL_REGNO is set properly, and >> don't attempt to share with the various global pieces of rtl (such as >> frame_pointer_rtx). */ >> >> -rtx >> -gen_raw_REG (machine_mode mode, unsigned int regno) >> +void >> +init_raw_REG (rtx x, machine_mode mode, unsigned int regno) >> { >> - rtx x = rtx_alloc (REG MEM_STAT_INFO); >> set_mode_and_regno (x, mode, regno); >> REG_ATTRS (x) = NULL; >> ORIGINAL_REGNO (x) = regno; >> +} >> + >> +/* Generate a new REG rtx. */ >> + >> +rtx >> +gen_raw_REG (machine_mode mode, unsigned int regno) >> +{ >> + rtx x = rtx_alloc (REG MEM_STAT_INFO); >> + init_raw_REG (x, mode, regno); >> return x; >> } >> >> diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c >> index 5c78fabfb50..bb2087da258 100644 >> --- a/gcc/gengenrtl.c >> +++ b/gcc/gengenrtl.c >> @@ -231,8 +231,7 @@ genmacro (int idx) >> puts (")"); >> } >> >> -/* Generate the code for the function to generate RTL whose >> - format is FORMAT. */ >> +/* Generate the code for functions to generate RTL whose format is FORMAT. */ >> >> static void >> gendef (const char *format) >> @@ -240,22 +239,18 @@ gendef (const char *format) >> const char *p; >> int i, j; >> >> - /* Start by writing the definition of the function name and the types >> + /* Write the definition of the init function name and the types >> of the arguments. */ >> >> - printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); >> + puts ("static inline void"); >> + printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format); >> for (p = format, i = 0; *p != 0; p++) >> if (*p != '0') >> printf (",\n\t%sarg%d", type_from_format (*p), i++); >> + puts (")"); >> >> - puts (" MEM_STAT_DECL)"); >> - >> - /* Now write out the body of the function itself, which allocates >> - the memory and initializes it. */ >> + /* Now write out the body of the init function itself. */ >> puts ("{"); >> - puts (" rtx rt;"); >> - puts (" rt = rtx_alloc (code PASS_MEM_STAT);\n"); >> - >> puts (" PUT_MODE_RAW (rt, mode);"); >> >> for (p = format, i = j = 0; *p ; ++p, ++i) >> @@ -266,16 +261,56 @@ gendef (const char *format) >> else >> printf (" %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++); >> >> - puts ("\n return rt;\n}\n"); >> + puts ("}\n"); >> + >> + /* Write the definition of the gen function name and the types >> + of the arguments. */ >> + >> + puts ("static inline rtx"); >> + printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); >> + for (p = format, i = 0; *p != 0; p++) >> + if (*p != '0') >> + printf (",\n\t%sarg%d", type_from_format (*p), i++); >> + puts (" MEM_STAT_DECL)"); >> + >> + /* Now write out the body of the function itself, which allocates >> + the memory and initializes it. */ >> + puts ("{"); >> + puts (" rtx rt;\n"); >> + >> + puts (" rt = rtx_alloc (code PASS_MEM_STAT);"); >> + printf (" init_rtx_fmt_%s (rt, mode", format); >> + for (p = format, i = 0; *p != 0; p++) >> + if (*p != '0') >> + printf (", arg%d", i++); >> + puts (");\n"); >> + >> + puts (" return rt;\n}\n"); >> + >> + /* Write the definition of gen macro. */ >> + >> printf ("#define gen_rtx_fmt_%s(c, m", format); >> for (p = format, i = 0; *p != 0; p++) >> if (*p != '0') >> - printf (", p%i",i++); >> - printf (")\\\n gen_rtx_fmt_%s_stat (c, m", format); >> + printf (", arg%d", i++); >> + printf (") \\\n gen_rtx_fmt_%s_stat ((c), (m)", format); >> for (p = format, i = 0; *p != 0; p++) >> if (*p != '0') >> - printf (", p%i",i++); >> + printf (", (arg%d)", i++); >> printf (" MEM_STAT_INFO)\n\n"); >> + >> + /* Write the definition of alloca macro. */ >> + >> + printf ("#define alloca_rtx_fmt_%s(rt, c, m", format); >> + for (p = format, i = 0; *p != 0; p++) >> + if (*p != '0') >> + printf (", arg%d", i++); >> + printf (") \\\n rtx_alloca ((rt), (c)); \\\n"); >> + printf (" init_rtx_fmt_%s ((rt), (m)", format); >> + for (p = format, i = 0; *p != 0; p++) >> + if (*p != '0') >> + printf (", (arg%d)", i++); >> + printf (")\n\n"); >> } >> >> /* Generate the documentation header for files we write. */ >> diff --git a/gcc/rtl.h b/gcc/rtl.h >> index efb9b3ce40d..44733d8a39e 100644 >> --- a/gcc/rtl.h >> +++ b/gcc/rtl.h >> @@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void); >> >> /* In rtl.c */ >> extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO); >> +#define rtx_alloca(rt, code) \ >> + (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \ >> + memset ((rt), 0, RTX_HDR_SIZE); \ >> + PUT_CODE ((rt), (code)); >> extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int); >> #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ) >> #define const_wide_int_alloc(NWORDS) \ >> @@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn, >> extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT); >> extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec); >> extern void set_mode_and_regno (rtx, machine_mode, unsigned int); >> +extern void init_raw_REG (rtx, machine_mode, unsigned int); >> extern rtx gen_raw_REG (machine_mode, unsigned int); >> +#define alloca_raw_REG(rt, mode, regno) \ >> + rtx_alloca ((rt), REG); \ >> + init_raw_REG ((rt), (mode), (regno)) >> extern rtx gen_rtx_REG (machine_mode, unsigned int); >> extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64); >> extern rtx gen_rtx_MEM (machine_mode, rtx); >> >> which now allows me to write: >> >> rtx reg1, reg2, test; >> alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); >> alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2); >> alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2); >> >> If that looks ok, I'll resend the series. > > that looks OK to me - please leave Richard S. time to comment. Also while > I'd like to see > > rtx reg1 = alloca_raw_REG (cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); > > I don't really see a way to write that portably (or at all), do you all agree? > GCC doesn't seem to convert alloca() calls to __builtin_stack_save/restore > nor place CLOBBERs to end their lifetime. But is it guaranteed that the > alloca result is valid until frame termination? Hmm, the alloca man page says: The alloca() function allocates size bytes of space in the stack frame of the caller. This temporary space is automatically freed when the function that called alloca() returns to its caller. ... The space allocated by alloca() is not automatically deallocated if the pointer that refers to it simply goes out of scope. A quick experiment with gcc and clang confirms this. I think this means I can make alloca_raw_REG macro return the allocated pointer using the return-from-block GNU extension.
> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich <iii@linux.ibm.com>: > >> Am 26.08.2019 um 15:06 schrieb Richard Biener <richard.guenther@gmail.com>: >> >> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: >>> >>>> Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guenther@gmail.com>: >>>> >>>> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: >>>>> >>>>>> Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>: >>>>>> >>>>>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford >>>>>> <richard.sandiford@arm.com> wrote: >>>>>>> >>>>>>> Ilya Leoshkevich <iii@linux.ibm.com> writes: >>>>>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >>>>>>>> return 0; >>>>>>>> } >>>>>>>> >>>>>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end >>>>>>>> + in order to determine its capabilities. In order to avoid creating fake >>>>>>>> + operations on each call, values from previous calls are cached in a global >>>>>>>> + cached_binops hash_table. It contains rtxes, which can be looked up using >>>>>>>> + binop_keys. */ >>>>>>>> + >>>>>>>> +struct binop_key { >>>>>>>> + enum rtx_code code; /* Operation code. */ >>>>>>>> + machine_mode value_mode; /* Result mode. */ >>>>>>>> + machine_mode cmp_op_mode; /* Operand mode. */ >>>>>>>> +}; >>>>>>>> + >>>>>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { >>>>>>>> + typedef rtx value_type; >>>>>>>> + typedef binop_key compare_type; >>>>>>>> + >>>>>>>> + static hashval_t >>>>>>>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) >>>>>>>> + { >>>>>>>> + inchash::hash hstate (0); >>>>>>>> + hstate.add_int (code); >>>>>>>> + hstate.add_int (value_mode); >>>>>>>> + hstate.add_int (cmp_op_mode); >>>>>>>> + return hstate.end (); >>>>>>>> + } >>>>>>>> + >>>>>>>> + static hashval_t >>>>>>>> + hash (const rtx &ref) >>>>>>>> + { >>>>>>>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >>>>>>>> + } >>>>>>>> + >>>>>>>> + static bool >>>>>>>> + equal (const rtx &ref1, const binop_key &ref2) >>>>>>>> + { >>>>>>>> + return (GET_CODE (ref1) == ref2.code) >>>>>>>> + && (GET_MODE (ref1) == ref2.value_mode) >>>>>>>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>>>>>>> + } >>>>>>>> +}; >>>>>>>> + >>>>>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; >>>>>>>> + >>>>>>>> +static rtx >>>>>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>>>>>>> + machine_mode cmp_op_mode) >>>>>>>> +{ >>>>>>>> + if (!cached_binops) >>>>>>>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); >>>>>>>> + binop_key key = { code, value_mode, cmp_op_mode }; >>>>>>>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>>>>>>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>>>>>>> + if (!*slot) >>>>>>>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >>>>>>>> + gen_reg_rtx (cmp_op_mode)); >>>>>>>> + return *slot; >>>>>>>> +} >>>>>>> >>>>>>> Sorry, I didn't mean anything this complicated. I just meant that >>>>>>> we should have a single cached rtx that we can change via PUT_CODE and >>>>>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >>>>>>> time. >>>>>>> >>>>>>> Something like: >>>>>>> >>>>>>> static GTY ((cache)) rtx cached_binop; >>>>>>> >>>>>>> rtx >>>>>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >>>>>>> { >>>>>>> if (cached_binop) >>>>>>> { >>>>>>> PUT_CODE (cached_binop, code); >>>>>>> PUT_MODE_RAW (cached_binop, mode); >>>>>>> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >>>>>>> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >>>>>>> } >>>>>>> else >>>>>>> { >>>>>>> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >>>>>>> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >>>>>>> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >>>>>>> } >>>>>>> return cached_binop; >>>>>>> } >>>>>> >>>>>> Hmm, maybe we need auto_rtx (code) that constructs such >>>>>> RTX on the stack instead of wasting a GC root (and causing >>>>>> issues for future threading of GCC ;)). >>>>> >>>>> Do you mean something like this? >>>>> >>>>> union { >>>>> char raw[rtx_code_size[code]]; >>>>> rtx rtx; >>>>> } binop; >>>>> >>>>> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show >>>>> anything useful), or should I implement this? >>>> >>>> It doesn't exist AFAIK, I thought about using alloca like >>>> >>>> rtx tem; >>>> rtx_alloca (tem, PLUS); >>>> >>>> and due to using alloca rtx_alloca has to be a macro like >>>> >>>> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); >>>> memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); >>>> >>>> maybe C++ can help making this prettier but of course >>>> since we use alloca we have to avoid opening new scopes. >>>> >>>> I guess templates like with auto_vec doesn't work unless >>>> we can make RTX_CODE_SIZE constant-evaluated. >>>> >>>> Richard. >>> >>> I ended up with the following change: >>> >>> diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c >>> index a667cdab94e..97aa2144e95 100644 >>> --- a/gcc/emit-rtl.c >>> +++ b/gcc/emit-rtl.c >>> @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned int regno) >>> set_regno_raw (x, regno, nregs); >>> } >>> >>> -/* Generate a new REG rtx. Make sure ORIGINAL_REGNO is set properly, and >>> +/* Initialize a REG rtx. Make sure ORIGINAL_REGNO is set properly, and >>> don't attempt to share with the various global pieces of rtl (such as >>> frame_pointer_rtx). */ >>> >>> -rtx >>> -gen_raw_REG (machine_mode mode, unsigned int regno) >>> +void >>> +init_raw_REG (rtx x, machine_mode mode, unsigned int regno) >>> { >>> - rtx x = rtx_alloc (REG MEM_STAT_INFO); >>> set_mode_and_regno (x, mode, regno); >>> REG_ATTRS (x) = NULL; >>> ORIGINAL_REGNO (x) = regno; >>> +} >>> + >>> +/* Generate a new REG rtx. */ >>> + >>> +rtx >>> +gen_raw_REG (machine_mode mode, unsigned int regno) >>> +{ >>> + rtx x = rtx_alloc (REG MEM_STAT_INFO); >>> + init_raw_REG (x, mode, regno); >>> return x; >>> } >>> >>> diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c >>> index 5c78fabfb50..bb2087da258 100644 >>> --- a/gcc/gengenrtl.c >>> +++ b/gcc/gengenrtl.c >>> @@ -231,8 +231,7 @@ genmacro (int idx) >>> puts (")"); >>> } >>> >>> -/* Generate the code for the function to generate RTL whose >>> - format is FORMAT. */ >>> +/* Generate the code for functions to generate RTL whose format is FORMAT. */ >>> >>> static void >>> gendef (const char *format) >>> @@ -240,22 +239,18 @@ gendef (const char *format) >>> const char *p; >>> int i, j; >>> >>> - /* Start by writing the definition of the function name and the types >>> + /* Write the definition of the init function name and the types >>> of the arguments. */ >>> >>> - printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); >>> + puts ("static inline void"); >>> + printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format); >>> for (p = format, i = 0; *p != 0; p++) >>> if (*p != '0') >>> printf (",\n\t%sarg%d", type_from_format (*p), i++); >>> + puts (")"); >>> >>> - puts (" MEM_STAT_DECL)"); >>> - >>> - /* Now write out the body of the function itself, which allocates >>> - the memory and initializes it. */ >>> + /* Now write out the body of the init function itself. */ >>> puts ("{"); >>> - puts (" rtx rt;"); >>> - puts (" rt = rtx_alloc (code PASS_MEM_STAT);\n"); >>> - >>> puts (" PUT_MODE_RAW (rt, mode);"); >>> >>> for (p = format, i = j = 0; *p ; ++p, ++i) >>> @@ -266,16 +261,56 @@ gendef (const char *format) >>> else >>> printf (" %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++); >>> >>> - puts ("\n return rt;\n}\n"); >>> + puts ("}\n"); >>> + >>> + /* Write the definition of the gen function name and the types >>> + of the arguments. */ >>> + >>> + puts ("static inline rtx"); >>> + printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); >>> + for (p = format, i = 0; *p != 0; p++) >>> + if (*p != '0') >>> + printf (",\n\t%sarg%d", type_from_format (*p), i++); >>> + puts (" MEM_STAT_DECL)"); >>> + >>> + /* Now write out the body of the function itself, which allocates >>> + the memory and initializes it. */ >>> + puts ("{"); >>> + puts (" rtx rt;\n"); >>> + >>> + puts (" rt = rtx_alloc (code PASS_MEM_STAT);"); >>> + printf (" init_rtx_fmt_%s (rt, mode", format); >>> + for (p = format, i = 0; *p != 0; p++) >>> + if (*p != '0') >>> + printf (", arg%d", i++); >>> + puts (");\n"); >>> + >>> + puts (" return rt;\n}\n"); >>> + >>> + /* Write the definition of gen macro. */ >>> + >>> printf ("#define gen_rtx_fmt_%s(c, m", format); >>> for (p = format, i = 0; *p != 0; p++) >>> if (*p != '0') >>> - printf (", p%i",i++); >>> - printf (")\\\n gen_rtx_fmt_%s_stat (c, m", format); >>> + printf (", arg%d", i++); >>> + printf (") \\\n gen_rtx_fmt_%s_stat ((c), (m)", format); >>> for (p = format, i = 0; *p != 0; p++) >>> if (*p != '0') >>> - printf (", p%i",i++); >>> + printf (", (arg%d)", i++); >>> printf (" MEM_STAT_INFO)\n\n"); >>> + >>> + /* Write the definition of alloca macro. */ >>> + >>> + printf ("#define alloca_rtx_fmt_%s(rt, c, m", format); >>> + for (p = format, i = 0; *p != 0; p++) >>> + if (*p != '0') >>> + printf (", arg%d", i++); >>> + printf (") \\\n rtx_alloca ((rt), (c)); \\\n"); >>> + printf (" init_rtx_fmt_%s ((rt), (m)", format); >>> + for (p = format, i = 0; *p != 0; p++) >>> + if (*p != '0') >>> + printf (", (arg%d)", i++); >>> + printf (")\n\n"); >>> } >>> >>> /* Generate the documentation header for files we write. */ >>> diff --git a/gcc/rtl.h b/gcc/rtl.h >>> index efb9b3ce40d..44733d8a39e 100644 >>> --- a/gcc/rtl.h >>> +++ b/gcc/rtl.h >>> @@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void); >>> >>> /* In rtl.c */ >>> extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO); >>> +#define rtx_alloca(rt, code) \ >>> + (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \ >>> + memset ((rt), 0, RTX_HDR_SIZE); \ >>> + PUT_CODE ((rt), (code)); >>> extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int); >>> #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ) >>> #define const_wide_int_alloc(NWORDS) \ >>> @@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn, >>> extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT); >>> extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec); >>> extern void set_mode_and_regno (rtx, machine_mode, unsigned int); >>> +extern void init_raw_REG (rtx, machine_mode, unsigned int); >>> extern rtx gen_raw_REG (machine_mode, unsigned int); >>> +#define alloca_raw_REG(rt, mode, regno) \ >>> + rtx_alloca ((rt), REG); \ >>> + init_raw_REG ((rt), (mode), (regno)) >>> extern rtx gen_rtx_REG (machine_mode, unsigned int); >>> extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64); >>> extern rtx gen_rtx_MEM (machine_mode, rtx); >>> >>> which now allows me to write: >>> >>> rtx reg1, reg2, test; >>> alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); >>> alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2); >>> alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2); >>> >>> If that looks ok, I'll resend the series. >> >> that looks OK to me - please leave Richard S. time to comment. Also while >> I'd like to see >> >> rtx reg1 = alloca_raw_REG (cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); >> >> I don't really see a way to write that portably (or at all), do you all agree? >> GCC doesn't seem to convert alloca() calls to __builtin_stack_save/restore >> nor place CLOBBERs to end their lifetime. But is it guaranteed that the >> alloca result is valid until frame termination? > > Hmm, the alloca man page says: > > The alloca() function allocates size bytes of space in the stack > frame of the caller. This temporary space is automatically freed > when the function that called alloca() returns to its caller. > ... > The space allocated by alloca() is not automatically deallocated if > the pointer that refers to it simply goes out of scope. > > A quick experiment with gcc and clang confirms this. I think this means > I can make alloca_raw_REG macro return the allocated pointer using the > return-from-block GNU extension. What do you think about the following approach? extern rtx rtx_init (rtx, RTX_CODE); #define rtx_alloca(code) \ rtx_init ((rtx) alloca (RTX_CODE_SIZE ((code))), (code)) ... rtx rtx_init (rtx rt, RTX_CODE code) { /* We want to clear everything up to the FLD array. Normally, this is one int, but we don't want to assume that and it isn't very portable anyway; this is. */ memset (rt, 0, RTX_HDR_SIZE); PUT_CODE (rt, code); return rt; } with similar changes to alloca_raw_REG and gengenrtl. This way we don't even need a GNU extension.
Ilya Leoshkevich <iii@linux.ibm.com> writes: >> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich <iii@linux.ibm.com>: >> >>> Am 26.08.2019 um 15:06 schrieb Richard Biener <richard.guenther@gmail.com>: >>> >>> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: >>>> >>>>> Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guenther@gmail.com>: >>>>> >>>>> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: >>>>>> >>>>>>> Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>: >>>>>>> >>>>>>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford >>>>>>> <richard.sandiford@arm.com> wrote: >>>>>>>> >>>>>>>> Ilya Leoshkevich <iii@linux.ibm.com> writes: >>>>>>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, >>>>>>>>> return 0; >>>>>>>>> } >>>>>>>>> >>>>>>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end >>>>>>>>> + in order to determine its capabilities. In order to avoid creating fake >>>>>>>>> + operations on each call, values from previous calls are cached in a global >>>>>>>>> + cached_binops hash_table. It contains rtxes, which can be looked up using >>>>>>>>> + binop_keys. */ >>>>>>>>> + >>>>>>>>> +struct binop_key { >>>>>>>>> + enum rtx_code code; /* Operation code. */ >>>>>>>>> + machine_mode value_mode; /* Result mode. */ >>>>>>>>> + machine_mode cmp_op_mode; /* Operand mode. */ >>>>>>>>> +}; >>>>>>>>> + >>>>>>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { >>>>>>>>> + typedef rtx value_type; >>>>>>>>> + typedef binop_key compare_type; >>>>>>>>> + >>>>>>>>> + static hashval_t >>>>>>>>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) >>>>>>>>> + { >>>>>>>>> + inchash::hash hstate (0); >>>>>>>>> + hstate.add_int (code); >>>>>>>>> + hstate.add_int (value_mode); >>>>>>>>> + hstate.add_int (cmp_op_mode); >>>>>>>>> + return hstate.end (); >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + static hashval_t >>>>>>>>> + hash (const rtx &ref) >>>>>>>>> + { >>>>>>>>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + static bool >>>>>>>>> + equal (const rtx &ref1, const binop_key &ref2) >>>>>>>>> + { >>>>>>>>> + return (GET_CODE (ref1) == ref2.code) >>>>>>>>> + && (GET_MODE (ref1) == ref2.value_mode) >>>>>>>>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); >>>>>>>>> + } >>>>>>>>> +}; >>>>>>>>> + >>>>>>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; >>>>>>>>> + >>>>>>>>> +static rtx >>>>>>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, >>>>>>>>> + machine_mode cmp_op_mode) >>>>>>>>> +{ >>>>>>>>> + if (!cached_binops) >>>>>>>>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); >>>>>>>>> + binop_key key = { code, value_mode, cmp_op_mode }; >>>>>>>>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); >>>>>>>>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); >>>>>>>>> + if (!*slot) >>>>>>>>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), >>>>>>>>> + gen_reg_rtx (cmp_op_mode)); >>>>>>>>> + return *slot; >>>>>>>>> +} >>>>>>>> >>>>>>>> Sorry, I didn't mean anything this complicated. I just meant that >>>>>>>> we should have a single cached rtx that we can change via PUT_CODE and >>>>>>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each >>>>>>>> time. >>>>>>>> >>>>>>>> Something like: >>>>>>>> >>>>>>>> static GTY ((cache)) rtx cached_binop; >>>>>>>> >>>>>>>> rtx >>>>>>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) >>>>>>>> { >>>>>>>> if (cached_binop) >>>>>>>> { >>>>>>>> PUT_CODE (cached_binop, code); >>>>>>>> PUT_MODE_RAW (cached_binop, mode); >>>>>>>> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); >>>>>>>> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); >>>>>>>> } >>>>>>>> else >>>>>>>> { >>>>>>>> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); >>>>>>>> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); >>>>>>>> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); >>>>>>>> } >>>>>>>> return cached_binop; >>>>>>>> } >>>>>>> >>>>>>> Hmm, maybe we need auto_rtx (code) that constructs such >>>>>>> RTX on the stack instead of wasting a GC root (and causing >>>>>>> issues for future threading of GCC ;)). >>>>>> >>>>>> Do you mean something like this? >>>>>> >>>>>> union { >>>>>> char raw[rtx_code_size[code]]; >>>>>> rtx rtx; >>>>>> } binop; >>>>>> >>>>>> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show >>>>>> anything useful), or should I implement this? >>>>> >>>>> It doesn't exist AFAIK, I thought about using alloca like >>>>> >>>>> rtx tem; >>>>> rtx_alloca (tem, PLUS); >>>>> >>>>> and due to using alloca rtx_alloca has to be a macro like >>>>> >>>>> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); >>>>> memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); >>>>> >>>>> maybe C++ can help making this prettier but of course >>>>> since we use alloca we have to avoid opening new scopes. >>>>> >>>>> I guess templates like with auto_vec doesn't work unless >>>>> we can make RTX_CODE_SIZE constant-evaluated. >>>>> >>>>> Richard. >>>> >>>> I ended up with the following change: >>>> >>>> diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c >>>> index a667cdab94e..97aa2144e95 100644 >>>> --- a/gcc/emit-rtl.c >>>> +++ b/gcc/emit-rtl.c >>>> @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned int regno) >>>> set_regno_raw (x, regno, nregs); >>>> } >>>> >>>> -/* Generate a new REG rtx. Make sure ORIGINAL_REGNO is set properly, and >>>> +/* Initialize a REG rtx. Make sure ORIGINAL_REGNO is set properly, and >>>> don't attempt to share with the various global pieces of rtl (such as >>>> frame_pointer_rtx). */ >>>> >>>> -rtx >>>> -gen_raw_REG (machine_mode mode, unsigned int regno) >>>> +void >>>> +init_raw_REG (rtx x, machine_mode mode, unsigned int regno) >>>> { >>>> - rtx x = rtx_alloc (REG MEM_STAT_INFO); >>>> set_mode_and_regno (x, mode, regno); >>>> REG_ATTRS (x) = NULL; >>>> ORIGINAL_REGNO (x) = regno; >>>> +} >>>> + >>>> +/* Generate a new REG rtx. */ >>>> + >>>> +rtx >>>> +gen_raw_REG (machine_mode mode, unsigned int regno) >>>> +{ >>>> + rtx x = rtx_alloc (REG MEM_STAT_INFO); >>>> + init_raw_REG (x, mode, regno); >>>> return x; >>>> } >>>> >>>> diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c >>>> index 5c78fabfb50..bb2087da258 100644 >>>> --- a/gcc/gengenrtl.c >>>> +++ b/gcc/gengenrtl.c >>>> @@ -231,8 +231,7 @@ genmacro (int idx) >>>> puts (")"); >>>> } >>>> >>>> -/* Generate the code for the function to generate RTL whose >>>> - format is FORMAT. */ >>>> +/* Generate the code for functions to generate RTL whose format is FORMAT. */ >>>> >>>> static void >>>> gendef (const char *format) >>>> @@ -240,22 +239,18 @@ gendef (const char *format) >>>> const char *p; >>>> int i, j; >>>> >>>> - /* Start by writing the definition of the function name and the types >>>> + /* Write the definition of the init function name and the types >>>> of the arguments. */ >>>> >>>> - printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); >>>> + puts ("static inline void"); >>>> + printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format); >>>> for (p = format, i = 0; *p != 0; p++) >>>> if (*p != '0') >>>> printf (",\n\t%sarg%d", type_from_format (*p), i++); >>>> + puts (")"); >>>> >>>> - puts (" MEM_STAT_DECL)"); >>>> - >>>> - /* Now write out the body of the function itself, which allocates >>>> - the memory and initializes it. */ >>>> + /* Now write out the body of the init function itself. */ >>>> puts ("{"); >>>> - puts (" rtx rt;"); >>>> - puts (" rt = rtx_alloc (code PASS_MEM_STAT);\n"); >>>> - >>>> puts (" PUT_MODE_RAW (rt, mode);"); >>>> >>>> for (p = format, i = j = 0; *p ; ++p, ++i) >>>> @@ -266,16 +261,56 @@ gendef (const char *format) >>>> else >>>> printf (" %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++); >>>> >>>> - puts ("\n return rt;\n}\n"); >>>> + puts ("}\n"); >>>> + >>>> + /* Write the definition of the gen function name and the types >>>> + of the arguments. */ >>>> + >>>> + puts ("static inline rtx"); >>>> + printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); >>>> + for (p = format, i = 0; *p != 0; p++) >>>> + if (*p != '0') >>>> + printf (",\n\t%sarg%d", type_from_format (*p), i++); >>>> + puts (" MEM_STAT_DECL)"); >>>> + >>>> + /* Now write out the body of the function itself, which allocates >>>> + the memory and initializes it. */ >>>> + puts ("{"); >>>> + puts (" rtx rt;\n"); >>>> + >>>> + puts (" rt = rtx_alloc (code PASS_MEM_STAT);"); >>>> + printf (" init_rtx_fmt_%s (rt, mode", format); >>>> + for (p = format, i = 0; *p != 0; p++) >>>> + if (*p != '0') >>>> + printf (", arg%d", i++); >>>> + puts (");\n"); >>>> + >>>> + puts (" return rt;\n}\n"); >>>> + >>>> + /* Write the definition of gen macro. */ >>>> + >>>> printf ("#define gen_rtx_fmt_%s(c, m", format); >>>> for (p = format, i = 0; *p != 0; p++) >>>> if (*p != '0') >>>> - printf (", p%i",i++); >>>> - printf (")\\\n gen_rtx_fmt_%s_stat (c, m", format); >>>> + printf (", arg%d", i++); >>>> + printf (") \\\n gen_rtx_fmt_%s_stat ((c), (m)", format); >>>> for (p = format, i = 0; *p != 0; p++) >>>> if (*p != '0') >>>> - printf (", p%i",i++); >>>> + printf (", (arg%d)", i++); >>>> printf (" MEM_STAT_INFO)\n\n"); >>>> + >>>> + /* Write the definition of alloca macro. */ >>>> + >>>> + printf ("#define alloca_rtx_fmt_%s(rt, c, m", format); >>>> + for (p = format, i = 0; *p != 0; p++) >>>> + if (*p != '0') >>>> + printf (", arg%d", i++); >>>> + printf (") \\\n rtx_alloca ((rt), (c)); \\\n"); >>>> + printf (" init_rtx_fmt_%s ((rt), (m)", format); >>>> + for (p = format, i = 0; *p != 0; p++) >>>> + if (*p != '0') >>>> + printf (", (arg%d)", i++); >>>> + printf (")\n\n"); >>>> } >>>> >>>> /* Generate the documentation header for files we write. */ >>>> diff --git a/gcc/rtl.h b/gcc/rtl.h >>>> index efb9b3ce40d..44733d8a39e 100644 >>>> --- a/gcc/rtl.h >>>> +++ b/gcc/rtl.h >>>> @@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void); >>>> >>>> /* In rtl.c */ >>>> extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO); >>>> +#define rtx_alloca(rt, code) \ >>>> + (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \ >>>> + memset ((rt), 0, RTX_HDR_SIZE); \ >>>> + PUT_CODE ((rt), (code)); >>>> extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int); >>>> #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ) >>>> #define const_wide_int_alloc(NWORDS) \ >>>> @@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn, >>>> extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT); >>>> extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec); >>>> extern void set_mode_and_regno (rtx, machine_mode, unsigned int); >>>> +extern void init_raw_REG (rtx, machine_mode, unsigned int); >>>> extern rtx gen_raw_REG (machine_mode, unsigned int); >>>> +#define alloca_raw_REG(rt, mode, regno) \ >>>> + rtx_alloca ((rt), REG); \ >>>> + init_raw_REG ((rt), (mode), (regno)) >>>> extern rtx gen_rtx_REG (machine_mode, unsigned int); >>>> extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64); >>>> extern rtx gen_rtx_MEM (machine_mode, rtx); >>>> >>>> which now allows me to write: >>>> >>>> rtx reg1, reg2, test; >>>> alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); >>>> alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2); >>>> alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2); >>>> >>>> If that looks ok, I'll resend the series. >>> >>> that looks OK to me - please leave Richard S. time to comment. Also while >>> I'd like to see >>> >>> rtx reg1 = alloca_raw_REG (cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); >>> >>> I don't really see a way to write that portably (or at all), do you all agree? >>> GCC doesn't seem to convert alloca() calls to __builtin_stack_save/restore >>> nor place CLOBBERs to end their lifetime. But is it guaranteed that the >>> alloca result is valid until frame termination? >> >> Hmm, the alloca man page says: >> >> The alloca() function allocates size bytes of space in the stack >> frame of the caller. This temporary space is automatically freed >> when the function that called alloca() returns to its caller. >> ... >> The space allocated by alloca() is not automatically deallocated if >> the pointer that refers to it simply goes out of scope. >> >> A quick experiment with gcc and clang confirms this. I think this means >> I can make alloca_raw_REG macro return the allocated pointer using the >> return-from-block GNU extension. > > What do you think about the following approach? > > extern rtx rtx_init (rtx, RTX_CODE); > #define rtx_alloca(code) \ > rtx_init ((rtx) alloca (RTX_CODE_SIZE ((code))), (code)) > > ... > > rtx > rtx_init (rtx rt, RTX_CODE code) > { > /* We want to clear everything up to the FLD array. Normally, this > is one int, but we don't want to assume that and it isn't very > portable anyway; this is. */ > memset (rt, 0, RTX_HDR_SIZE); Might as well remove this comment while you're there. RTX_HDR_SIZE has long ceased to be sizeof (int) for any host. > PUT_CODE (rt, code); > return rt; > } > > with similar changes to alloca_raw_REG and gengenrtl. This way we don't > even need a GNU extension. Sounds good to me, but rtx_init should probably be an inline function. Thanks, Richard
On Tue, Aug 27, 2019 at 9:01 AM Richard Sandiford <richard.sandiford@arm.com> wrote: > > Ilya Leoshkevich <iii@linux.ibm.com> writes: > >> Am 26.08.2019 um 15:17 schrieb Ilya Leoshkevich <iii@linux.ibm.com>: > >> > >>> Am 26.08.2019 um 15:06 schrieb Richard Biener <richard.guenther@gmail.com>: > >>> > >>> On Mon, Aug 26, 2019 at 1:54 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > >>>> > >>>>> Am 26.08.2019 um 10:49 schrieb Richard Biener <richard.guenther@gmail.com>: > >>>>> > >>>>> On Fri, Aug 23, 2019 at 1:35 PM Ilya Leoshkevich <iii@linux.ibm.com> wrote: > >>>>>> > >>>>>>> Am 23.08.2019 um 13:24 schrieb Richard Biener <richard.guenther@gmail.com>: > >>>>>>> > >>>>>>> On Fri, Aug 23, 2019 at 12:43 PM Richard Sandiford > >>>>>>> <richard.sandiford@arm.com> wrote: > >>>>>>>> > >>>>>>>> Ilya Leoshkevich <iii@linux.ibm.com> writes: > >>>>>>>>> @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, > >>>>>>>>> return 0; > >>>>>>>>> } > >>>>>>>>> > >>>>>>>>> +/* can_vector_compare_p presents fake rtx binary operations to the the back-end > >>>>>>>>> + in order to determine its capabilities. In order to avoid creating fake > >>>>>>>>> + operations on each call, values from previous calls are cached in a global > >>>>>>>>> + cached_binops hash_table. It contains rtxes, which can be looked up using > >>>>>>>>> + binop_keys. */ > >>>>>>>>> + > >>>>>>>>> +struct binop_key { > >>>>>>>>> + enum rtx_code code; /* Operation code. */ > >>>>>>>>> + machine_mode value_mode; /* Result mode. */ > >>>>>>>>> + machine_mode cmp_op_mode; /* Operand mode. */ > >>>>>>>>> +}; > >>>>>>>>> + > >>>>>>>>> +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { > >>>>>>>>> + typedef rtx value_type; > >>>>>>>>> + typedef binop_key compare_type; > >>>>>>>>> + > >>>>>>>>> + static hashval_t > >>>>>>>>> + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) > >>>>>>>>> + { > >>>>>>>>> + inchash::hash hstate (0); > >>>>>>>>> + hstate.add_int (code); > >>>>>>>>> + hstate.add_int (value_mode); > >>>>>>>>> + hstate.add_int (cmp_op_mode); > >>>>>>>>> + return hstate.end (); > >>>>>>>>> + } > >>>>>>>>> + > >>>>>>>>> + static hashval_t > >>>>>>>>> + hash (const rtx &ref) > >>>>>>>>> + { > >>>>>>>>> + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); > >>>>>>>>> + } > >>>>>>>>> + > >>>>>>>>> + static bool > >>>>>>>>> + equal (const rtx &ref1, const binop_key &ref2) > >>>>>>>>> + { > >>>>>>>>> + return (GET_CODE (ref1) == ref2.code) > >>>>>>>>> + && (GET_MODE (ref1) == ref2.value_mode) > >>>>>>>>> + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); > >>>>>>>>> + } > >>>>>>>>> +}; > >>>>>>>>> + > >>>>>>>>> +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; > >>>>>>>>> + > >>>>>>>>> +static rtx > >>>>>>>>> +get_cached_binop (enum rtx_code code, machine_mode value_mode, > >>>>>>>>> + machine_mode cmp_op_mode) > >>>>>>>>> +{ > >>>>>>>>> + if (!cached_binops) > >>>>>>>>> + cached_binops = hash_table<binop_hasher>::create_ggc (1024); > >>>>>>>>> + binop_key key = { code, value_mode, cmp_op_mode }; > >>>>>>>>> + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); > >>>>>>>>> + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); > >>>>>>>>> + if (!*slot) > >>>>>>>>> + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), > >>>>>>>>> + gen_reg_rtx (cmp_op_mode)); > >>>>>>>>> + return *slot; > >>>>>>>>> +} > >>>>>>>> > >>>>>>>> Sorry, I didn't mean anything this complicated. I just meant that > >>>>>>>> we should have a single cached rtx that we can change via PUT_CODE and > >>>>>>>> PUT_MODE_RAW for each new query, rather than allocating a new rtx each > >>>>>>>> time. > >>>>>>>> > >>>>>>>> Something like: > >>>>>>>> > >>>>>>>> static GTY ((cache)) rtx cached_binop; > >>>>>>>> > >>>>>>>> rtx > >>>>>>>> get_cached_binop (machine_mode mode, rtx_code code, machine_mode op_mode) > >>>>>>>> { > >>>>>>>> if (cached_binop) > >>>>>>>> { > >>>>>>>> PUT_CODE (cached_binop, code); > >>>>>>>> PUT_MODE_RAW (cached_binop, mode); > >>>>>>>> PUT_MODE_RAW (XEXP (cached_binop, 0), op_mode); > >>>>>>>> PUT_MODE_RAW (XEXP (cached_binop, 1), op_mode); > >>>>>>>> } > >>>>>>>> else > >>>>>>>> { > >>>>>>>> rtx reg1 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1); > >>>>>>>> rtx reg2 = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 2); > >>>>>>>> cached_binop = gen_rtx_fmt_ee (code, mode, reg1, reg2); > >>>>>>>> } > >>>>>>>> return cached_binop; > >>>>>>>> } > >>>>>>> > >>>>>>> Hmm, maybe we need auto_rtx (code) that constructs such > >>>>>>> RTX on the stack instead of wasting a GC root (and causing > >>>>>>> issues for future threading of GCC ;)). > >>>>>> > >>>>>> Do you mean something like this? > >>>>>> > >>>>>> union { > >>>>>> char raw[rtx_code_size[code]]; > >>>>>> rtx rtx; > >>>>>> } binop; > >>>>>> > >>>>>> Does this exist already (git grep auto.*rtx / rtx.*auto doesn't show > >>>>>> anything useful), or should I implement this? > >>>>> > >>>>> It doesn't exist AFAIK, I thought about using alloca like > >>>>> > >>>>> rtx tem; > >>>>> rtx_alloca (tem, PLUS); > >>>>> > >>>>> and due to using alloca rtx_alloca has to be a macro like > >>>>> > >>>>> #define rtx_alloca(r, code) r = (rtx)alloca (RTX_CODE_SIZE(code)); > >>>>> memset (r, 0, RTX_HDR_SIZE); PUT_CODE (r, code); > >>>>> > >>>>> maybe C++ can help making this prettier but of course > >>>>> since we use alloca we have to avoid opening new scopes. > >>>>> > >>>>> I guess templates like with auto_vec doesn't work unless > >>>>> we can make RTX_CODE_SIZE constant-evaluated. > >>>>> > >>>>> Richard. > >>>> > >>>> I ended up with the following change: > >>>> > >>>> diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c > >>>> index a667cdab94e..97aa2144e95 100644 > >>>> --- a/gcc/emit-rtl.c > >>>> +++ b/gcc/emit-rtl.c > >>>> @@ -466,17 +466,25 @@ set_mode_and_regno (rtx x, machine_mode mode, unsigned int regno) > >>>> set_regno_raw (x, regno, nregs); > >>>> } > >>>> > >>>> -/* Generate a new REG rtx. Make sure ORIGINAL_REGNO is set properly, and > >>>> +/* Initialize a REG rtx. Make sure ORIGINAL_REGNO is set properly, and > >>>> don't attempt to share with the various global pieces of rtl (such as > >>>> frame_pointer_rtx). */ > >>>> > >>>> -rtx > >>>> -gen_raw_REG (machine_mode mode, unsigned int regno) > >>>> +void > >>>> +init_raw_REG (rtx x, machine_mode mode, unsigned int regno) > >>>> { > >>>> - rtx x = rtx_alloc (REG MEM_STAT_INFO); > >>>> set_mode_and_regno (x, mode, regno); > >>>> REG_ATTRS (x) = NULL; > >>>> ORIGINAL_REGNO (x) = regno; > >>>> +} > >>>> + > >>>> +/* Generate a new REG rtx. */ > >>>> + > >>>> +rtx > >>>> +gen_raw_REG (machine_mode mode, unsigned int regno) > >>>> +{ > >>>> + rtx x = rtx_alloc (REG MEM_STAT_INFO); > >>>> + init_raw_REG (x, mode, regno); > >>>> return x; > >>>> } > >>>> > >>>> diff --git a/gcc/gengenrtl.c b/gcc/gengenrtl.c > >>>> index 5c78fabfb50..bb2087da258 100644 > >>>> --- a/gcc/gengenrtl.c > >>>> +++ b/gcc/gengenrtl.c > >>>> @@ -231,8 +231,7 @@ genmacro (int idx) > >>>> puts (")"); > >>>> } > >>>> > >>>> -/* Generate the code for the function to generate RTL whose > >>>> - format is FORMAT. */ > >>>> +/* Generate the code for functions to generate RTL whose format is FORMAT. */ > >>>> > >>>> static void > >>>> gendef (const char *format) > >>>> @@ -240,22 +239,18 @@ gendef (const char *format) > >>>> const char *p; > >>>> int i, j; > >>>> > >>>> - /* Start by writing the definition of the function name and the types > >>>> + /* Write the definition of the init function name and the types > >>>> of the arguments. */ > >>>> > >>>> - printf ("static inline rtx\ngen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); > >>>> + puts ("static inline void"); > >>>> + printf ("init_rtx_fmt_%s (rtx rt, machine_mode mode", format); > >>>> for (p = format, i = 0; *p != 0; p++) > >>>> if (*p != '0') > >>>> printf (",\n\t%sarg%d", type_from_format (*p), i++); > >>>> + puts (")"); > >>>> > >>>> - puts (" MEM_STAT_DECL)"); > >>>> - > >>>> - /* Now write out the body of the function itself, which allocates > >>>> - the memory and initializes it. */ > >>>> + /* Now write out the body of the init function itself. */ > >>>> puts ("{"); > >>>> - puts (" rtx rt;"); > >>>> - puts (" rt = rtx_alloc (code PASS_MEM_STAT);\n"); > >>>> - > >>>> puts (" PUT_MODE_RAW (rt, mode);"); > >>>> > >>>> for (p = format, i = j = 0; *p ; ++p, ++i) > >>>> @@ -266,16 +261,56 @@ gendef (const char *format) > >>>> else > >>>> printf (" %s (rt, %d) = arg%d;\n", accessor_from_format (*p), i, j++); > >>>> > >>>> - puts ("\n return rt;\n}\n"); > >>>> + puts ("}\n"); > >>>> + > >>>> + /* Write the definition of the gen function name and the types > >>>> + of the arguments. */ > >>>> + > >>>> + puts ("static inline rtx"); > >>>> + printf ("gen_rtx_fmt_%s_stat (RTX_CODE code, machine_mode mode", format); > >>>> + for (p = format, i = 0; *p != 0; p++) > >>>> + if (*p != '0') > >>>> + printf (",\n\t%sarg%d", type_from_format (*p), i++); > >>>> + puts (" MEM_STAT_DECL)"); > >>>> + > >>>> + /* Now write out the body of the function itself, which allocates > >>>> + the memory and initializes it. */ > >>>> + puts ("{"); > >>>> + puts (" rtx rt;\n"); > >>>> + > >>>> + puts (" rt = rtx_alloc (code PASS_MEM_STAT);"); > >>>> + printf (" init_rtx_fmt_%s (rt, mode", format); > >>>> + for (p = format, i = 0; *p != 0; p++) > >>>> + if (*p != '0') > >>>> + printf (", arg%d", i++); > >>>> + puts (");\n"); > >>>> + > >>>> + puts (" return rt;\n}\n"); > >>>> + > >>>> + /* Write the definition of gen macro. */ > >>>> + > >>>> printf ("#define gen_rtx_fmt_%s(c, m", format); > >>>> for (p = format, i = 0; *p != 0; p++) > >>>> if (*p != '0') > >>>> - printf (", p%i",i++); > >>>> - printf (")\\\n gen_rtx_fmt_%s_stat (c, m", format); > >>>> + printf (", arg%d", i++); > >>>> + printf (") \\\n gen_rtx_fmt_%s_stat ((c), (m)", format); > >>>> for (p = format, i = 0; *p != 0; p++) > >>>> if (*p != '0') > >>>> - printf (", p%i",i++); > >>>> + printf (", (arg%d)", i++); > >>>> printf (" MEM_STAT_INFO)\n\n"); > >>>> + > >>>> + /* Write the definition of alloca macro. */ > >>>> + > >>>> + printf ("#define alloca_rtx_fmt_%s(rt, c, m", format); > >>>> + for (p = format, i = 0; *p != 0; p++) > >>>> + if (*p != '0') > >>>> + printf (", arg%d", i++); > >>>> + printf (") \\\n rtx_alloca ((rt), (c)); \\\n"); > >>>> + printf (" init_rtx_fmt_%s ((rt), (m)", format); > >>>> + for (p = format, i = 0; *p != 0; p++) > >>>> + if (*p != '0') > >>>> + printf (", (arg%d)", i++); > >>>> + printf (")\n\n"); > >>>> } > >>>> > >>>> /* Generate the documentation header for files we write. */ > >>>> diff --git a/gcc/rtl.h b/gcc/rtl.h > >>>> index efb9b3ce40d..44733d8a39e 100644 > >>>> --- a/gcc/rtl.h > >>>> +++ b/gcc/rtl.h > >>>> @@ -2933,6 +2933,10 @@ extern HOST_WIDE_INT get_stack_check_protect (void); > >>>> > >>>> /* In rtl.c */ > >>>> extern rtx rtx_alloc (RTX_CODE CXX_MEM_STAT_INFO); > >>>> +#define rtx_alloca(rt, code) \ > >>>> + (rt) = (rtx) alloca (RTX_CODE_SIZE ((code))); \ > >>>> + memset ((rt), 0, RTX_HDR_SIZE); \ > >>>> + PUT_CODE ((rt), (code)); > >>>> extern rtx rtx_alloc_stat_v (RTX_CODE MEM_STAT_DECL, int); > >>>> #define rtx_alloc_v(c, SZ) rtx_alloc_stat_v (c MEM_STAT_INFO, SZ) > >>>> #define const_wide_int_alloc(NWORDS) \ > >>>> @@ -3797,7 +3801,11 @@ gen_rtx_INSN (machine_mode mode, rtx_insn *prev_insn, rtx_insn *next_insn, > >>>> extern rtx gen_rtx_CONST_INT (machine_mode, HOST_WIDE_INT); > >>>> extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec); > >>>> extern void set_mode_and_regno (rtx, machine_mode, unsigned int); > >>>> +extern void init_raw_REG (rtx, machine_mode, unsigned int); > >>>> extern rtx gen_raw_REG (machine_mode, unsigned int); > >>>> +#define alloca_raw_REG(rt, mode, regno) \ > >>>> + rtx_alloca ((rt), REG); \ > >>>> + init_raw_REG ((rt), (mode), (regno)) > >>>> extern rtx gen_rtx_REG (machine_mode, unsigned int); > >>>> extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64); > >>>> extern rtx gen_rtx_MEM (machine_mode, rtx); > >>>> > >>>> which now allows me to write: > >>>> > >>>> rtx reg1, reg2, test; > >>>> alloca_raw_REG (reg1, cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); > >>>> alloca_raw_REG (reg2, cmp_op_mode, LAST_VIRTUAL_REGISTER + 2); > >>>> alloca_rtx_fmt_ee (test, code, value_mode, reg1, reg2); > >>>> > >>>> If that looks ok, I'll resend the series. > >>> > >>> that looks OK to me - please leave Richard S. time to comment. Also while > >>> I'd like to see > >>> > >>> rtx reg1 = alloca_raw_REG (cmp_op_mode, LAST_VIRTUAL_REGISTER + 1); > >>> > >>> I don't really see a way to write that portably (or at all), do you all agree? > >>> GCC doesn't seem to convert alloca() calls to __builtin_stack_save/restore > >>> nor place CLOBBERs to end their lifetime. But is it guaranteed that the > >>> alloca result is valid until frame termination? > >> > >> Hmm, the alloca man page says: > >> > >> The alloca() function allocates size bytes of space in the stack > >> frame of the caller. This temporary space is automatically freed > >> when the function that called alloca() returns to its caller. > >> ... > >> The space allocated by alloca() is not automatically deallocated if > >> the pointer that refers to it simply goes out of scope. > >> > >> A quick experiment with gcc and clang confirms this. I think this means > >> I can make alloca_raw_REG macro return the allocated pointer using the > >> return-from-block GNU extension. > > > > What do you think about the following approach? > > > > extern rtx rtx_init (rtx, RTX_CODE); > > #define rtx_alloca(code) \ > > rtx_init ((rtx) alloca (RTX_CODE_SIZE ((code))), (code)) > > > > ... > > > > rtx > > rtx_init (rtx rt, RTX_CODE code) > > { > > /* We want to clear everything up to the FLD array. Normally, this > > is one int, but we don't want to assume that and it isn't very > > portable anyway; this is. */ > > memset (rt, 0, RTX_HDR_SIZE); > > Might as well remove this comment while you're there. RTX_HDR_SIZE > has long ceased to be sizeof (int) for any host. > > > PUT_CODE (rt, code); > > return rt; > > } > > > > with similar changes to alloca_raw_REG and gengenrtl. This way we don't > > even need a GNU extension. > > Sounds good to me, but rtx_init should probably be an inline function. Sounds good to me, too. Richard. > Thanks, > Richard
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 597dc01328b..d2207da5657 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2541,7 +2541,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/function.c $(srcdir)/except.c \ $(srcdir)/ggc-tests.c \ $(srcdir)/gcse.c $(srcdir)/godump.c \ - $(srcdir)/lists.c $(srcdir)/optabs-libfuncs.c \ + $(srcdir)/lists.c $(srcdir)/optabs.c $(srcdir)/optabs-libfuncs.c \ $(srcdir)/profile.c $(srcdir)/mcf.c \ $(srcdir)/reg-stack.c $(srcdir)/cfgrtl.c \ $(srcdir)/stor-layout.c \ diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index 8157798cc71..e68bb39c021 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -23,7 +23,10 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "target.h" #include "insn-codes.h" +#include "rtl.h" #include "tree.h" +#include "memmodel.h" +#include "optabs.h" #include "optabs-tree.h" #include "stor-layout.h" @@ -347,8 +350,12 @@ expand_vec_cond_expr_p (tree value_type, tree cmp_op_type, enum tree_code code) || maybe_ne (GET_MODE_NUNITS (value_mode), GET_MODE_NUNITS (cmp_op_mode))) return false; - if (get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type), - TYPE_UNSIGNED (cmp_op_type)) == CODE_FOR_nothing + bool unsigned_p = TYPE_UNSIGNED (cmp_op_type); + if (((get_vcond_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type), + unsigned_p) == CODE_FOR_nothing) + || !can_vector_compare_p (get_rtx_code (code, unsigned_p), + TYPE_MODE (value_type), + TYPE_MODE (cmp_op_type), cvcp_vcond)) && ((code != EQ_EXPR && code != NE_EXPR) || get_vcond_eq_icode (TYPE_MODE (value_type), TYPE_MODE (cmp_op_type)) == CODE_FOR_nothing)) diff --git a/gcc/optabs.c b/gcc/optabs.c index 9e54dda6e7f..07b4d824822 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "recog.h" #include "diagnostic-core.h" #include "rtx-vector-builder.h" +#include "hash-table.h" /* Include insn-config.h before expr.h so that HAVE_conditional_move is properly defined. */ @@ -3819,6 +3820,82 @@ can_compare_p (enum rtx_code code, machine_mode mode, return 0; } +/* can_vector_compare_p presents fake rtx binary operations to the the back-end + in order to determine its capabilities. In order to avoid creating fake + operations on each call, values from previous calls are cached in a global + cached_binops hash_table. It contains rtxes, which can be looked up using + binop_keys. */ + +struct binop_key { + enum rtx_code code; /* Operation code. */ + machine_mode value_mode; /* Result mode. */ + machine_mode cmp_op_mode; /* Operand mode. */ +}; + +struct binop_hasher : pointer_hash_mark<rtx>, ggc_cache_remove<rtx> { + typedef rtx value_type; + typedef binop_key compare_type; + + static hashval_t + hash (enum rtx_code code, machine_mode value_mode, machine_mode cmp_op_mode) + { + inchash::hash hstate (0); + hstate.add_int (code); + hstate.add_int (value_mode); + hstate.add_int (cmp_op_mode); + return hstate.end (); + } + + static hashval_t + hash (const rtx &ref) + { + return hash (GET_CODE (ref), GET_MODE (ref), GET_MODE (XEXP (ref, 0))); + } + + static bool + equal (const rtx &ref1, const binop_key &ref2) + { + return (GET_CODE (ref1) == ref2.code) + && (GET_MODE (ref1) == ref2.value_mode) + && (GET_MODE (XEXP (ref1, 0)) == ref2.cmp_op_mode); + } +}; + +static GTY ((cache)) hash_table<binop_hasher> *cached_binops; + +static rtx +get_cached_binop (enum rtx_code code, machine_mode value_mode, + machine_mode cmp_op_mode) +{ + if (!cached_binops) + cached_binops = hash_table<binop_hasher>::create_ggc (1024); + binop_key key = { code, value_mode, cmp_op_mode }; + hashval_t hash = binop_hasher::hash (code, value_mode, cmp_op_mode); + rtx *slot = cached_binops->find_slot_with_hash (key, hash, INSERT); + if (!*slot) + *slot = gen_rtx_fmt_ee (code, value_mode, gen_reg_rtx (cmp_op_mode), + gen_reg_rtx (cmp_op_mode)); + return *slot; +} + +bool +can_vector_compare_p (enum rtx_code code, machine_mode value_mode, + machine_mode cmp_op_mode, + enum can_vector_compare_purpose purpose) +{ + enum insn_code icode; + bool unsigned_p = (code == LTU || code == LEU || code == GTU || code == GEU); + rtx test = get_cached_binop(code, value_mode, cmp_op_mode); + + if (purpose == cvcp_vcond + && (icode = get_vcond_icode (value_mode, cmp_op_mode, unsigned_p)) + != CODE_FOR_nothing + && insn_operand_matches (icode, 3, test)) + return true; + + return false; +} + /* This function is called when we are going to emit a compare instruction that compares the values found in X and Y, using the rtl operator COMPARISON. @@ -7481,3 +7558,5 @@ expand_jump_insn (enum insn_code icode, unsigned int nops, if (!maybe_expand_jump_insn (icode, nops, ops)) gcc_unreachable (); } + +#include "gt-optabs.h" diff --git a/gcc/optabs.h b/gcc/optabs.h index 897bb5d4443..2b2338a67af 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -242,6 +242,21 @@ enum can_compare_purpose (without splitting it into pieces). */ extern int can_compare_p (enum rtx_code, machine_mode, enum can_compare_purpose); + +/* The various uses that a vector comparison can have; used by + can_vector_compare_p. So far only vcond is defined, vec_cmp is a possible + future extension. */ +enum can_vector_compare_purpose +{ + cvcp_vcond +}; + +/* Return whether back-end can emit a vector comparison insn(s) using a given + CODE, with operands with CMP_OP_MODE, producing a result with VALUE_MODE, + in order to achieve a PURPOSE. */ +extern bool can_vector_compare_p (enum rtx_code, machine_mode, machine_mode, + enum can_vector_compare_purpose); + extern rtx prepare_operand (enum insn_code, rtx, int, machine_mode, machine_mode, int); /* Emit a pair of rtl insns to compare two rtx's and to jump