From patchwork Sat Sep 14 06:13:22 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: DJ Delorie X-Patchwork-Id: 274897 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id EFDBA2C0149 for ; Sat, 14 Sep 2013 16:13:40 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :message-id:from:to:subject; q=dns; s=default; b=UVj8gjFlS0RR+sh im8wAZJ9V2pZULZphN7/h99Gh62AqWcvOpvvsMDGGYs1OeT1t23FUHOogDKZgesK vFmX2vXUSbs/t9SByaShC4t16/Bt3iyX1dYVqAlfGLZNv5v3iIplbtQ2cJwGNzRt tVtHrnFVrxYkJnf4KhRikXvVqRmI= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :message-id:from:to:subject; s=default; bh=xwmjhBTy4YmrIcSO9FTLC i9h1N8=; b=ynZWr7EzEps6POUvEc9yU6t7FccZ2PUqHvPG/9j9iPnuAd3qYnZYr 7jl7AhyVb21E/NlBEf6ATQ/tIV4arZf+fuejro1AHPyhYIXW1IBKJIojhniDB2lo QsCkdeyih/y+Zg9IJxckRn/zW+xlNbWpvGIrbFizUlZMHy3Ha9kj70= Received: (qmail 25444 invoked by alias); 14 Sep 2013 06:13:32 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 25431 invoked by uid 89); 14 Sep 2013 06:13:31 -0000 Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Sat, 14 Sep 2013 06:13:31 +0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-3.5 required=5.0 tests=AWL, BAYES_00, RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: mx1.redhat.com Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id r8E6DPcR028268 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Sat, 14 Sep 2013 02:13:25 -0400 Received: from greed.delorie.com (ovpn-113-20.phx2.redhat.com [10.3.113.20]) by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id r8E6DNHA001144 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO) for ; Sat, 14 Sep 2013 02:13:24 -0400 Received: from greed.delorie.com (greed.delorie.com [127.0.0.1]) by greed.delorie.com (8.14.4/8.14.4) with ESMTP id r8E6DMua031956 for ; Sat, 14 Sep 2013 02:13:22 -0400 Received: (from dj@localhost) by greed.delorie.com (8.14.4/8.14.4/Submit) id r8E6DMEa031955; Sat, 14 Sep 2013 02:13:22 -0400 Date: Sat, 14 Sep 2013 02:13:22 -0400 Message-Id: <201309140613.r8E6DMEa031955@greed.delorie.com> From: DJ Delorie To: gcc-patches@gcc.gnu.org Subject: [rl78] libgcc optimizations X-IsSubscribed: yes Various optimizations. Committed. 2013-09-14 DJ Delorie Nick Clifton * config/rl78/mulsi3.S: Remove a few unneeded moves and branches. * config/rl78/vregs.h: New. * config/rl78/signbit.S: New file. Implements signbit function. * config/rl78/divmodsi.S: New. * config/rl78/divmodhi.S: New. * config/rl78/divmodqi.S: New. * config/rl78/t-rl78: Build them here... * config/rl78/lib2div.c: ...but not here. Index: config/rl78/divmodsi.S =================================================================== --- config/rl78/divmodsi.S (revision 0) +++ config/rl78/divmodsi.S (revision 0) @@ -0,0 +1,521 @@ +/* SImode div/mod functions for the GCC support library for the Renesas RL78 processors. + Copyright (C) 2012,2013 Free Software Foundation, Inc. + Contributed by Red Hat. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef __RL78_G10__ + +#include "vregs.h" + + .macro make_generic which,need_result + + .if \need_result + quot = r8 + num = r12 + den = r16 + bit = r20 + .else + num = r8 + quot = r12 + den = r16 + bit = r20 + .endif + + quotH = quot+2 + quotL = quot + quotB0 = quot + quotB1 = quot+1 + quotB2 = quot+2 + quotB3 = quot+3 + + numH = num+2 + numL = num + numB0 = num + numB1 = num+1 + numB2 = num+2 + numB3 = num+3 + +#define denH bc + denL = den + denB0 = den + denB1 = den+1 +#define denB2 c +#define denB3 b + + bitH = bit+2 + bitL = bit + bitB0 = bit + bitB1 = bit+1 + bitB2 = bit+2 + bitB3 = bit+3 + +num_lt_den\which: + .if \need_result + movw r8, #0 + movw r10, #0 + .else + movw ax, [sp+8] + movw r8, ax + movw ax, [sp+10] + movw r10, ax + .endif + ret + +shift_den_bit16\which: + movw ax, denL + movw denH, ax + movw denL, #0 + .if \need_result + movw ax, bitL + movw bitH, ax + movw bitL, #0 + .else + mov a, bit + add a, #16 + mov bit, a + .endif + br $shift_den_bit\which + + ;; These routines leave DE alone - the signed functions use DE + ;; to store sign information that must remain intact + + .if \need_result + +generic_div: + + .else + +generic_mod: + + .endif + + ;; (quot,rem) = 8[sp] /% 12[sp] + + movw hl, sp + movw ax, [hl+14] ; denH + cmpw ax, [hl+10] ; numH + movw ax, [hl+12] ; denL + sknz + cmpw ax, [hl+8] ; numL + bh $num_lt_den\which + + sel rb2 + push ax ; denL +; push bc ; denH + push de ; bitL + push hl ; bitH - stored in BC + sel rb0 + + ;; (quot,rem) = 16[sp] /% 20[sp] + + ;; copy numerator + movw ax, [hl+8] + movw numL, ax + movw ax, [hl+10] + movw numH, ax + + ;; copy denomonator + movw ax, [hl+12] + movw denL, ax + movw ax, [hl+14] + movw denH, ax + + movw ax, denL + or a, denB2 + or a, denB3 ; not x + cmpw ax, #0 + bnz $den_not_zero\which + movw numL, #0 + movw numH, #0 + ret + +den_not_zero\which: + .if \need_result + ;; zero out quot + movw quotL, #0 + movw quotH, #0 + .endif + + ;; initialize bit to 1 + movw bitL, #1 + movw bitH, #0 + +; while (den < num && !(den & (1L << BITS_MINUS_1))) + + .if 1 + ;; see if we can short-circuit a bunch of shifts + movw ax, denH + cmpw ax, #0 + bnz $shift_den_bit\which + movw ax, denL + cmpw ax, numH + bnh $shift_den_bit16\which + .endif + +shift_den_bit\which: + movw ax, denH + mov1 cy,a.7 + bc $enter_main_loop\which + cmpw ax, numH + movw ax, denL ; we re-use this below + sknz + cmpw ax, numL + bh $enter_main_loop\which + + ;; den <<= 1 +; movw ax, denL ; already has it from the cmpw above + shlw ax, 1 + movw denL, ax +; movw ax, denH + rolwc denH, 1 +; movw denH, ax + + ;; bit <<= 1 + .if \need_result + movw ax, bitL + shlw ax, 1 + movw bitL, ax + movw ax, bitH + rolwc ax, 1 + movw bitH, ax + .else + ;; if we don't need to compute the quotent, we don't need an + ;; actual bit *mask*, we just need to keep track of which bit + inc bitB0 + .endif + + br $shift_den_bit\which + + ;; while (bit) +main_loop\which: + + ;; if (num >= den) (cmp den > num) + movw ax, numH + cmpw ax, denH + movw ax, numL + sknz + cmpw ax, denL + skz + bnh $next_loop\which + + ;; num -= den +; movw ax, numL ; already has it from the cmpw above + subw ax, denL + movw numL, ax + movw ax, numH + sknc + decw ax + subw ax, denH + movw numH, ax + + .if \need_result + ;; res |= bit + mov a, quotB0 + or a, bitB0 + mov quotB0, a + mov a, quotB1 + or a, bitB1 + mov quotB1, a + mov a, quotB2 + or a, bitB2 + mov quotB2, a + mov a, quotB3 + or a, bitB3 + mov quotB3, a + .endif + +next_loop\which: + + ;; den >>= 1 + movw ax, denH + shrw ax, 1 + movw denH, ax + mov a, denB1 + rorc a, 1 + mov denB1, a + mov a, denB0 + rorc a, 1 + mov denB0, a + + ;; bit >>= 1 + .if \need_result + movw ax, bitH + shrw ax, 1 + movw bitH, ax + mov a, bitB1 + rorc a, 1 + mov bitB1, a + mov a, bitB0 + rorc a, 1 + mov bitB0, a + .else + dec bitB0 + .endif + +enter_main_loop\which: + .if \need_result + movw ax, bitH + cmpw ax, #0 + bnz $main_loop\which + .else + cmp bitB0, #15 + bh $main_loop\which + .endif + ;; bit is HImode now; check others + movw ax, numH ; numerator + cmpw ax, #0 + bnz $bit_high_set\which + movw ax, denH ; denominator + cmpw ax, #0 + bz $switch_to_himode\which +bit_high_set\which: + .if \need_result + movw ax, bitL + cmpw ax, #0 + .else + cmp0 bitB0 + .endif + bnz $main_loop\which + +switch_to_himode\which: + .if \need_result + movw ax, bitL + cmpw ax, #0 + .else + cmp0 bitB0 + .endif + bz $main_loop_done_himode\which + + ;; From here on in, r22, r14, and r18 are all zero + ;; while (bit) +main_loop_himode\which: + + ;; if (num >= den) (cmp den > num) + movw ax, denL + cmpw ax, numL + bh $next_loop_himode\which + + ;; num -= den + movw ax, numL + subw ax, denL + movw numL, ax + movw ax, numH + sknc + decw ax + subw ax, denH + movw numH, ax + + .if \need_result + ;; res |= bit + mov a, quotB0 + or a, bitB0 + mov quotB0, a + mov a, quotB1 + or a, bitB1 + mov quotB1, a + .endif + +next_loop_himode\which: + + ;; den >>= 1 + movw ax, denL + shrw ax, 1 + movw denL, ax + + .if \need_result + ;; bit >>= 1 + movw ax, bitL + shrw ax, 1 + movw bitL, ax + .else + dec bitB0 + .endif + + .if \need_result + movw ax, bitL + cmpw ax, #0 + .else + cmp0 bitB0 + .endif + bnz $main_loop_himode\which + +main_loop_done_himode\which: + sel rb2 + pop hl ; bitH - stored in BC + pop de ; bitL +; pop bc ; denH + pop ax ; denL + sel rb0 + + ret + .endm + + make_generic _d 1 + make_generic _m 0 + +;---------------------------------------------------------------------- + + .global ___udivsi3 + .type ___udivsi3,@function +___udivsi3: + ;; r8 = 4[sp] / 8[sp] + call $!generic_div + ret + .size ___udivsi3, . - ___udivsi3 + + + .global ___umodsi3 + .type ___umodsi3,@function +___umodsi3: + ;; r8 = 4[sp] % 8[sp] + call $!generic_mod + ret + .size ___umodsi3, . - ___umodsi3 + +;---------------------------------------------------------------------- + + .macro neg_ax + movw hl, ax + movw ax, #0 + subw ax, [hl] + movw [hl], ax + movw ax, #0 + sknc + decw ax + subw ax, [hl+2] + movw [hl+2], ax + .endm + + .global ___divsi3 + .type ___divsi3,@function +___divsi3: + ;; r8 = 4[sp] / 8[sp] + movw de, #0 + mov a, [sp+7] + mov1 cy, a.7 + bc $div_signed_num + mov a, [sp+11] + mov1 cy, a.7 + bc $div_signed_den + call $!generic_div + ret + +div_signed_num: + ;; neg [sp+4] + movw ax, sp + addw ax, #4 + neg_ax + mov d, #1 + mov a, [sp+11] + mov1 cy, a.7 + bnc $div_unsigned_den +div_signed_den: + ;; neg [sp+8] + movw ax, sp + addw ax, #8 + neg_ax + mov e, #1 +div_unsigned_den: + call $!generic_div + + mov a, d + cmp0 a + bz $div_skip_restore_num + ;; We have to restore the numerator [sp+4] + movw ax, sp + addw ax, #4 + neg_ax + mov a, d +div_skip_restore_num: + xor a, e + bz $div_no_neg + movw ax, #r8 + neg_ax +div_no_neg: + mov a, e + cmp0 a + bz $div_skip_restore_den + ;; We have to restore the denominator [sp+8] + movw ax, sp + addw ax, #8 + neg_ax +div_skip_restore_den: + ret + .size ___divsi3, . - ___divsi3 + + + .global ___modsi3 + .type ___modsi3,@function +___modsi3: + ;; r8 = 4[sp] % 8[sp] + movw de, #0 + mov a, [sp+7] + mov1 cy, a.7 + bc $mod_signed_num + mov a, [sp+11] + mov1 cy, a.7 + bc $mod_signed_den + call $!generic_mod + ret + +mod_signed_num: + ;; neg [sp+4] + movw ax, sp + addw ax, #4 + neg_ax + mov d, #1 + mov a, [sp+11] + mov1 cy, a.7 + bnc $mod_unsigned_den +mod_signed_den: + ;; neg [sp+8] + movw ax, sp + addw ax, #8 + neg_ax + mov e, #1 +mod_unsigned_den: + call $!generic_mod + + mov a, d + cmp0 a + bz $mod_no_neg + movw ax, #r8 + neg_ax + ;; We have to restore [sp+4] as well. + movw ax, sp + addw ax, #4 + neg_ax +mod_no_neg: + .if 1 + mov a, e + cmp0 a + bz $mod_skip_restore_den + movw ax, sp + addw ax, #8 + neg_ax +mod_skip_restore_den: + .endif + ret + .size ___modsi3, . - ___modsi3 + +#endif Index: config/rl78/t-rl78 =================================================================== --- config/rl78/t-rl78 (revision 202587) +++ config/rl78/t-rl78 (working copy) @@ -22,7 +22,11 @@ LIB2ADD = \ $(srcdir)/config/rl78/trampoline.S \ $(srcdir)/config/rl78/lib2div.c \ $(srcdir)/config/rl78/lib2mul.c \ $(srcdir)/config/rl78/lib2shift.c \ $(srcdir)/config/rl78/lshrsi3.S \ $(srcdir)/config/rl78/mulsi3.S \ + $(srcdir)/config/rl78/divmodsi.S \ + $(srcdir)/config/rl78/divmodhi.S \ + $(srcdir)/config/rl78/divmodqi.S \ + $(srcdir)/config/rl78/signbit.S \ $(srcdir)/config/rl78/cmpsi2.S Index: config/rl78/signbit.S =================================================================== --- config/rl78/signbit.S (revision 0) +++ config/rl78/signbit.S (revision 0) @@ -0,0 +1,67 @@ +; Copyright (C) 2012,2013 Free Software Foundation, Inc. +; Contributed by Red Hat. +; +; This file is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License as published by the +; Free Software Foundation; either version 3, or (at your option) any +; later version. +; +; This file is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; Under Section 7 of GPL version 3, you are granted additional +; permissions described in the GCC Runtime Library Exception, version +; 3.1, as published by the Free Software Foundation. +; +; You should have received a copy of the GNU General Public License and +; a copy of the GCC Runtime Library Exception along with this program; +; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +; . + +#include "vregs.h" + +;; int signbitf (float X) +;; int signbit (double X) +;; int signbitl (long double X) +;; +;; `signbit' returns a nonzero value if the value of X has its sign +;; bit set. +;; +;; This is not the same as `x < 0.0', because IEEE 754 floating point +;; allows zero to be signed. The comparison `-0.0 < 0.0' is false, +;; but `signbit (-0.0)' will return a nonzero value. + +;---------------------------------------------------------------------- + + .text + + .global _signbit +_signbit: + .global _signbitf +_signbitf: + ;; X is at [sp+4] + ;; result is in R8..R9 + + movw r8, #0 + mov a, [sp+7] + mov1 cy, a.7 + sknc + movw r8, #1 + ret + .size _signbit, . - _signbit + .size _signbitf, . - _signbitf + + .global _signbitl +_signbitl: + ;; X is at [sp+4] + ;; result is in R8..R9 + + movw r8, #0 + mov a, [sp+11] + mov1 cy, a.7 + sknc + movw r8, #1 + ret + .size _signbitl, . - _signbitl Index: config/rl78/divmodhi.S =================================================================== --- config/rl78/divmodhi.S (revision 0) +++ config/rl78/divmodhi.S (revision 0) @@ -0,0 +1,337 @@ +/* HImode div/mod functions for the GCC support library for the Renesas RL78 processors. + Copyright (C) 2012,2013 Free Software Foundation, Inc. + Contributed by Red Hat. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef __RL78_G10__ + +#include "vregs.h" + + .macro make_generic which,need_result + + .if \need_result + quot = r8 + num = r10 + den = r12 + bit = r14 + .else + num = r8 + quot = r10 + den = r12 + bit = r14 + .endif + + quotB0 = quot + quotB1 = quot+1 + + numB0 = num + numB1 = num+1 + + denB0 = den + denB1 = den+1 + + bitB0 = bit + bitB1 = bit+1 + +#if 1 +#define bit bc +#define bitB0 c +#define bitB1 b +#endif + +num_lt_den\which: + .if \need_result + movw r8, #0 + .else + movw ax, [sp+8] + movw r8, ax + .endif + ret + + ;; These routines leave DE alone - the signed functions use DE + ;; to store sign information that must remain intact + + .if \need_result + +generic_div: + + .else + +generic_mod: + + .endif + + ;; (quot,rem) = 8[sp] /% 10[sp] + + movw hl, sp + movw ax, [hl+10] ; denH + cmpw ax, [hl+8] ; numH + bh $num_lt_den\which + + ;; (quot,rem) = 16[sp] /% 20[sp] + + ;; copy numerator + movw ax, [hl+8] + movw num, ax + + ;; copy denomonator + movw ax, [hl+10] + movw den, ax + + movw ax, den + cmpw ax, #0 + bnz $den_not_zero\which + movw num, #0 + ret + +den_not_zero\which: + .if \need_result + ;; zero out quot + movw quot, #0 + .endif + + ;; initialize bit to 1 + movw bit, #1 + +; while (den < num && !(den & (1L << BITS_MINUS_1))) + +shift_den_bit\which: + movw ax, den + mov1 cy,a.7 + bc $enter_main_loop\which + cmpw ax, num + bh $enter_main_loop\which + + ;; den <<= 1 +; movw ax, den ; already has it from the cmpw above + shlw ax, 1 + movw den, ax + + ;; bit <<= 1 + .if \need_result +#ifdef bit + shlw bit, 1 +#else + movw ax, bit + shlw ax, 1 + movw bit, ax +#endif + .else + ;; if we don't need to compute the quotent, we don't need an + ;; actual bit *mask*, we just need to keep track of which bit + inc bitB0 + .endif + + br $shift_den_bit\which + +main_loop\which: + + ;; if (num >= den) (cmp den > num) + movw ax, den + cmpw ax, num + bh $next_loop\which + + ;; num -= den + movw ax, num + subw ax, den + movw num, ax + + .if \need_result + ;; res |= bit + mov a, quotB0 + or a, bitB0 + mov quotB0, a + mov a, quotB1 + or a, bitB1 + mov quotB1, a + .endif + +next_loop\which: + + ;; den >>= 1 + movw ax, den + shrw ax, 1 + movw den, ax + + .if \need_result + ;; bit >>= 1 + movw ax, bit + shrw ax, 1 + movw bit, ax + .else + dec bitB0 + .endif + +enter_main_loop\which: + .if \need_result + movw ax, bit + cmpw ax, #0 + .else + cmp0 bitB0 + .endif + bnz $main_loop\which + +main_loop_done\which: + ret + .endm + + make_generic _d 1 + make_generic _m 0 + +;---------------------------------------------------------------------- + + .global ___udivhi3 + .type ___udivhi3,@function +___udivhi3: + ;; r8 = 4[sp] / 6[sp] + call $!generic_div + ret + .size ___udivhi3, . - ___udivhi3 + + + .global ___umodhi3 + .type ___umodhi3,@function +___umodhi3: + ;; r8 = 4[sp] % 6[sp] + call $!generic_mod + ret + .size ___umodhi3, . - ___umodhi3 + +;---------------------------------------------------------------------- + + .macro neg_ax + movw hl, ax + movw ax, #0 + subw ax, [hl] + movw [hl], ax + .endm + + .global ___divhi3 + .type ___divhi3,@function +___divhi3: + ;; r8 = 4[sp] / 6[sp] + movw de, #0 + mov a, [sp+5] + mov1 cy, a.7 + bc $div_signed_num + mov a, [sp+7] + mov1 cy, a.7 + bc $div_signed_den + call $!generic_div + ret + +div_signed_num: + ;; neg [sp+4] + movw ax, sp + addw ax, #4 + neg_ax + mov d, #1 + mov a, [sp+7] + mov1 cy, a.7 + bnc $div_unsigned_den +div_signed_den: + ;; neg [sp+6] + movw ax, sp + addw ax, #6 + neg_ax + mov e, #1 +div_unsigned_den: + call $!generic_div + + mov a, d + cmp0 a + bz $div_skip_restore_num + ;; We have to restore the numerator [sp+4] + movw ax, sp + addw ax, #4 + neg_ax + mov a, d +div_skip_restore_num: + xor a, e + bz $div_no_neg + movw ax, #r8 + neg_ax +div_no_neg: + mov a, e + cmp0 a + bz $div_skip_restore_den + movw ax, sp + addw ax, #6 + neg_ax +div_skip_restore_den: + ret + .size ___divhi3, . - ___divhi3 + + + .global ___modhi3 + .type ___modhi3,@function +___modhi3: + ;; r8 = 4[sp] % 6[sp] + movw de, #0 + mov a, [sp+5] + mov1 cy, a.7 + bc $mod_signed_num + mov a, [sp+7] + mov1 cy, a.7 + bc $mod_signed_den + call $!generic_mod + ret + +mod_signed_num: + ;; neg [sp+4] + movw ax, sp + addw ax, #4 + neg_ax + mov d, #1 + mov a, [sp+7] + mov1 cy, a.7 + bnc $mod_unsigned_den +mod_signed_den: + ;; neg [sp+6] + movw ax, sp + addw ax, #6 + neg_ax +mod_unsigned_den: + call $!generic_mod + + mov a, d + cmp0 a + bz $mod_no_neg + movw ax, #r8 + neg_ax + ;; Also restore numerator + movw ax, sp + addw ax, #4 + neg_ax +mod_no_neg: + mov a, e + cmp0 a + bz $mod_skip_restore_den + movw ax, sp + addw ax, #6 + neg_ax +mod_skip_restore_den: + ret + .size ___modhi3, . - ___modhi3 + +#endif Index: config/rl78/lib2div.c =================================================================== --- config/rl78/lib2div.c (revision 202587) +++ config/rl78/lib2div.c (working copy) @@ -31,12 +31,14 @@ typedef int sint08_type __at typedef unsigned int uint08_type __attribute__ ((mode (QI))); typedef int word_type __attribute__ ((mode (__word__))); #define C3B(a,b,c) a##b##c #define C3(a,b,c) C3B(a,b,c) +#if 0 + #define UINT_TYPE uint32_type #define SINT_TYPE sint32_type #define BITS_MINUS_1 31 #define NAME_MODE si #include "rl78-divmod.h" @@ -62,12 +64,14 @@ typedef int word_type __at #define SINT_TYPE sint08_type #define BITS_MINUS_1 7 #define NAME_MODE qi #include "rl78-divmod.h" +#endif + /* See the comment by the definition of LIBGCC2_UNITS_PER_WORD in m32c.h for why we are creating extra versions of some of the functions defined in libgcc2.c. */ #define LIBGCC2_UNITS_PER_WORD 2 Index: config/rl78/vregs.h =================================================================== --- config/rl78/vregs.h (revision 0) +++ config/rl78/vregs.h (revision 0) @@ -0,0 +1,32 @@ + +; real +; GAS defines r0..r7 as aliases for real registers; we want the saddr +; forms here. +r_0 = 0xffef8 +r_1 = 0xffef9 +r_2 = 0xffefa +r_3 = 0xffefb +r_4 = 0xffefc +r_5 = 0xffefd +r_6 = 0xffefe +r_7 = 0xffeff + +; clobberable +r8 = 0xffef0 +r9 = 0xffef1 +r10 = 0xffef2 +r11 = 0xffef3 +r12 = 0xffef4 +r13 = 0xffef5 +r14 = 0xffef6 +r15 = 0xffef7 +; preserved +r16 = 0xffee8 +r17 = 0xffee9 +r18 = 0xffeea +r19 = 0xffeeb +r20 = 0xffeec +r21 = 0xffeed +r22 = 0xffeee +r23 = 0xffeef + Index: config/rl78/mulsi3.S =================================================================== --- config/rl78/mulsi3.S (revision 202587) +++ config/rl78/mulsi3.S (working copy) @@ -67,56 +67,52 @@ r23 = 0xffeef .global ___mulsi3 ; (USI a, USI b) ___mulsi3: ;; A is at [sp+4] ;; B is at [sp+8] ;; result is in R8..R11 - movw ax, sp - addw ax, #4 - movw hl, ax - sel rb2 push ax push bc sel rb0 clrw ax movw r8, ax movw r16, ax - movw ax, [hl+6] + movw ax, [sp+14] cmpw ax, #0 bz $1f cmpw ax, #0xffff bnz $2f - movw ax, [hl] + movw ax, [sp+8] sel rb1 subw ax, r_0 sel rb0 br $1f 2: movw bc, ax - movw ax, [hl] + movw ax, [sp+8] cmpw ax, #0 skz call !.Lmul_hi 1: - movw ax, [hl+2] + movw ax, [sp+10] cmpw ax, #0 bz $1f cmpw ax, #0xffff bnz $2f - movw ax, [hl+4] + movw ax, [sp+12] sel rb1 subw ax, r_0 sel rb0 br $1f 2: movw bc, ax - movw ax, [hl+4] + movw ax, [sp+12] cmpw ax, #0 skz call !.Lmul_hi 1: movw ax, r8 @@ -127,15 +123,15 @@ ___mulsi3: ;; now do R16:R8 += op1L * op2L ;; op1 is in AX.0 (needs to shrw) ;; op2 is in BC.2 and BC.1 (bc can shlw/rolcw) ;; res is in AX.2 and AX.1 (needs to addw) - movw ax, [hl] + movw ax, [sp+8] movw r10, ax ; BC.1 - movw ax, [hl+4] + movw ax, [sp+12] cmpw ax, r10 bc $.Lmul_hisi_top movw bc, r10 movw r10, ax movw ax, bc @@ -188,12 +184,19 @@ ___mulsi3: sel rb0 ret ;---------------------------------------------------------------------- + .global ___mulhi3 +___mulhi3: + movw r8, #0 + movw ax, [sp+6] + movw bc, ax + movw ax, [sp+4] + ;; R8 += AX * BC .Lmul_hi: cmpw ax, bc skc xchw ax, bc br $.Lmul_hi_loop @@ -215,21 +218,6 @@ ___mulsi3: bc $.Lmul_hi_top cmpw ax, #0 bnz $.Lmul_hi_no_add .Lmul_hi_done: ret - -;---------------------------------------------------------------------- - - .global ___mulhi3 -___mulhi3: - sel rb1 - clrw ax - sel rb0 - movw ax, sp - addw ax, #4 - movw hl, ax - movw ax, [hl+2] - movw bc, ax - movw ax, [hl] - br $.Lmul_hi Index: config/rl78/divmodqi.S =================================================================== --- config/rl78/divmodqi.S (revision 0) +++ config/rl78/divmodqi.S (revision 0) @@ -0,0 +1,310 @@ +/* QImode div/mod functions for the GCC support library for the Renesas RL78 processors. + Copyright (C) 2012,2013 Free Software Foundation, Inc. + Contributed by Red Hat. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef __RL78_G10__ + +#include "vregs.h" + + .macro make_generic which,need_result + + .if \need_result + quot = r8 + num = r10 + den = r12 + bit = r14 + .else + num = r8 + quot = r10 + den = r12 + bit = r14 + .endif + +#if 1 +#define bit b +#define den c +#define bitden bc +#endif + +num_lt_den\which: + .if \need_result + mov r8, #0 + .else + mov a, [hl+4] + mov r8, a + .endif + ret + +num_eq_den\which: + .if \need_result + mov r8, #1 + .else + mov r8, #0 + .endif + ret + +den_is_zero\which: + mov r8, #0xff + ret + + ;; These routines leave DE alone - the signed functions use DE + ;; to store sign information that must remain intact + + .if \need_result + +generic_div: + + .else + +generic_mod: + + .endif + + ;; (quot,rem) = 4[hl] /% 6[hl] + + mov a, [hl+4] ; num + cmp a, [hl+6] ; den + bz $num_eq_den\which + bnh $num_lt_den\which + + ;; copy numerator +; mov a, [hl+4] ; already there from above + mov num, a + + ;; copy denomonator + mov a, [hl+6] + mov den, a + + cmp0 den + bz $den_is_zero\which + +den_not_zero\which: + .if \need_result + ;; zero out quot + mov quot, #0 + .endif + + ;; initialize bit to 1 + mov bit, #1 + +; while (den < num && !(den & (1L << BITS_MINUS_1))) + +shift_den_bit\which: + .macro sdb_one\which + mov a, den + mov1 cy,a.7 + bc $enter_main_loop\which + cmp a, num + bh $enter_main_loop\which + + ;; den <<= 1 +; mov a, den ; already has it from the cmpw above + shl a, 1 + mov den, a + + ;; bit <<= 1 + shl bit, 1 + .endm + + sdb_one\which + sdb_one\which + + br $shift_den_bit\which + +main_loop\which: + + ;; if (num >= den) (cmp den > num) + mov a, den + cmp a, num + bh $next_loop\which + + ;; num -= den + mov a, num + sub a, den + mov num, a + + .if \need_result + ;; res |= bit + mov a, quot + or a, bit + mov quot, a + .endif + +next_loop\which: + + ;; den, bit >>= 1 + movw ax, bitden + shrw ax, 1 + movw bitden, ax + +enter_main_loop\which: + cmp0 bit + bnz $main_loop\which + +main_loop_done\which: + ret + .endm + + make_generic _d 1 + make_generic _m 0 + +;---------------------------------------------------------------------- + + .global ___udivqi3 + .type ___udivqi3,@function +___udivqi3: + ;; r8 = 4[sp] / 6[sp] + movw hl, sp + br $!generic_div + .size ___udivqi3, . - ___udivqi3 + + + .global ___umodqi3 + .type ___umodqi3,@function +___umodqi3: + ;; r8 = 4[sp] % 6[sp] + movw hl, sp + br $!generic_mod + .size ___umodqi3, . - ___umodqi3 + +;---------------------------------------------------------------------- + + .macro neg_ax + movw hl, ax + mov a, #0 + sub a, [hl] + mov [hl], a + .endm + + .global ___divqi3 + .type ___divqi3,@function +___divqi3: + ;; r8 = 4[sp] / 6[sp] + movw hl, sp + movw de, #0 + mov a, [sp+4] + mov1 cy, a.7 + bc $div_signed_num + mov a, [sp+6] + mov1 cy, a.7 + bc $div_signed_den + br $!generic_div + +div_signed_num: + ;; neg [sp+4] + mov a, #0 + sub a, [hl+4] + mov [hl+4], a + mov d, #1 + mov a, [sp+6] + mov1 cy, a.6 + bnc $div_unsigned_den +div_signed_den: + ;; neg [sp+6] + mov a, #0 + sub a, [hl+6] + mov [hl+6], a + mov e, #1 +div_unsigned_den: + call $!generic_div + + mov a, d + cmp0 a + bz $div_skip_restore_num + ;; We have to restore the numerator [sp+4] + movw ax, sp + addw ax, #4 + neg_ax + mov a, d +div_skip_restore_num: + xor a, e + bz $div_no_neg + movw ax, #r8 + neg_ax +div_no_neg: + mov a, e + cmp0 a + bz $div_skip_restore_den + movw ax, sp + addw ax, #6 + neg_ax +div_skip_restore_den: + ret + .size ___divqi3, . - ___divqi3 + + + .global ___modqi3 + .type ___modqi3,@function +___modqi3: + ;; r8 = 4[sp] % 6[sp] + movw hl, sp + movw de, #0 + mov a, [hl+4] + mov1 cy, a.7 + bc $mod_signed_num + mov a, [hl+6] + mov1 cy, a.7 + bc $mod_signed_den + br $!generic_mod + +mod_signed_num: + ;; neg [sp+4] + mov a, #0 + sub a, [hl+4] + mov [hl+4], a + mov d, #1 + mov a, [hl+6] + mov1 cy, a.7 + bnc $mod_unsigned_den +mod_signed_den: + ;; neg [sp+6] + mov a, #0 + sub a, [hl+6] + mov [hl+6], a + mov e, #1 +mod_unsigned_den: + call $!generic_mod + + mov a, d + cmp0 a + bz $mod_no_neg + mov a, #0 + sub a, r8 + mov r8, a + ;; Also restore numerator + movw ax, sp + addw ax, #4 + neg_ax +mod_no_neg: + mov a, e + cmp0 a + bz $mod_skip_restore_den + movw ax, sp + addw ax, #6 + neg_ax +mod_skip_restore_den: + ret + .size ___modqi3, . - ___modqi3 + +#endif