diff mbox

[09/13] target-openrisc: Add CPU which neglects Carry and Overflow Flag

Message ID 1383073495-5332-10-git-send-email-sebastian@macke.de
State New
Headers show

Commit Message

Sebastian Macke Oct. 29, 2013, 7:04 p.m. UTC
The carry and overflag and the instructions l.addc and l.addic
are never used in the toolchain. Linux and gcc compiled software
don't need them. To speed up the emulation a cpu was added which
neglects the flags for l.addi, l.add, l.sub and
generates an illegal instruction error for l.addic and l.addc

This cpu violate the specification and will not run through the
test cases but increases the emulation speed between 20% and 200%.
The cpu is activated by the option "-cpu or1200-noflags"

Signed-off-by: Sebastian Macke <sebastian@macke.de>
---
 target-openrisc/cpu.c       | 15 +++++++++++++--
 target-openrisc/cpu.h       |  1 +
 target-openrisc/translate.c | 21 +++++++++++++++++----
 3 files changed, 31 insertions(+), 6 deletions(-)

Comments

Richard Henderson Oct. 30, 2013, 6:14 p.m. UTC | #1
On 10/29/2013 12:04 PM, Sebastian Macke wrote:
> The carry and overflag and the instructions l.addc and l.addic
> are never used in the toolchain. Linux and gcc compiled software
> don't need them.

Really?  That's quite surprising.

> To speed up the emulation a cpu was added which
> neglects the flags for l.addi, l.add, l.sub and
> generates an illegal instruction error for l.addic and l.addc

I'm somewhat shocked that l.addc is never used.  To me that points
to a missed opportunity in the compiler.

It would be much better to simply improve handling of these bits.
In a previous patch set you broke out SR[F] to its own variable;
I suggest that you do the same for SR[CY] and SR[OV].

If you can implement add et al without branches, the TCG optimizer
will be able to do a good job eliminating shadowed computation.

A good example to follow here is the ARM implementation.  Have a
look at the gen_add_CC and gen_sub_CC functions especially.  Note
that the overflow bit is stored in bit 31 of cpu_VF and the other
bits of cpu_VF are ignored.


r~
Sebastian Macke Oct. 30, 2013, 7:22 p.m. UTC | #2
On 30/10/2013 11:14 AM, Richard Henderson wrote:
> On 10/29/2013 12:04 PM, Sebastian Macke wrote:
>> The carry and overflag and the instructions l.addc and l.addic
>> are never used in the toolchain. Linux and gcc compiled software
>> don't need them.
> Really?  That's quite surprising.

Yes, Really.  :)

>> To speed up the emulation a cpu was added which
>> neglects the flags for l.addi, l.add, l.sub and
>> generates an illegal instruction error for l.addic and l.addc
> I'm somewhat shocked that l.addc is never used.  To me that points
> to a missed opportunity in the compiler.
Yes

> It would be much better to simply improve handling of these bits.
> In a previous patch set you broke out SR[F] to its own variable;
> I suggest that you do the same for SR[CY] and SR[OV].
>
> If you can implement add et al without branches, the TCG optimizer
> will be able to do a good job eliminating shadowed computation.
>
> A good example to follow here is the ARM implementation.  Have a
> look at the gen_add_CC and gen_sub_CC functions especially.  Note
> that the overflow bit is stored in bit 31 of cpu_VF and the other
> bits of cpu_VF are ignored.
>
>
> r~

I would like to implement lazy flags or at least change the big bunch of 
code which is there right now.
But for this patchset I want to keep it that way and change the whole 
flag handling in one of the next patches.

There is almost nothing available to test the flags right now. And I 
have to keep an eye on the delayed slot. So this is more work than you 
might expect.
Richard Henderson Oct. 30, 2013, 7:31 p.m. UTC | #3
On 10/30/2013 12:22 PM, Sebastian Macke wrote:
> I would like to implement lazy flags or at least change the big bunch of code
> which is there right now.
> But for this patchset I want to keep it that way and change the whole flag
> handling in one of the next patches.

If you don't want to modernize the flags handling now, fine, but I think adding
this separate cpu definition is a step in the wrong direction.


r~
diff mbox

Patch

diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index 09ba728..c80a30c 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -124,6 +124,16 @@  static void or1200_initfn(Object *obj)
     set_feature(cpu, OPENRISC_FEATURE_OF32S);
 }
 
+static void or1200_noflags_initfn(Object *obj)
+{
+    OpenRISCCPU *cpu = OPENRISC_CPU(obj);
+
+    set_feature(cpu, OPENRISC_FEATURE_OB32S);
+    set_feature(cpu, OPENRISC_FEATURE_OF32S);
+    set_feature(cpu, OPENRISC_FEATURE_NOFLAGS);
+}
+
+
 static void openrisc_any_initfn(Object *obj)
 {
     OpenRISCCPU *cpu = OPENRISC_CPU(obj);
@@ -137,8 +147,9 @@  typedef struct OpenRISCCPUInfo {
 } OpenRISCCPUInfo;
 
 static const OpenRISCCPUInfo openrisc_cpus[] = {
-    { .name = "or1200",      .initfn = or1200_initfn },
-    { .name = "any",         .initfn = openrisc_any_initfn },
+    { .name = "or1200",         .initfn = or1200_initfn },
+    { .name = "or1200-noflags", .initfn = or1200_noflags_initfn },
+    { .name = "any",            .initfn = openrisc_any_initfn },
 };
 
 static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
index 94bbb17..2423fad 100644
--- a/target-openrisc/cpu.h
+++ b/target-openrisc/cpu.h
@@ -204,6 +204,7 @@  enum {
     OPENRISC_FEATURE_OF32S = (1 << 7),
     OPENRISC_FEATURE_OF64S = (1 << 8),
     OPENRISC_FEATURE_OV64S = (1 << 9),
+    OPENRISC_FEATURE_NOFLAGS = (1 << 10),
 };
 
 /* Tick Timer Mode Register */
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index b1f73c4..d926995 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -50,6 +50,7 @@  typedef struct DisasContext {
     TranslationBlock *tb;
     target_ulong pc;
     uint32_t tb_flags, synced_flags, flags;
+    uint32_t cpufeature;
     uint32_t is_jmp;
     uint32_t j_state; /* specifies the jump type*/
     target_ulong j_target; /* target address for jump */
@@ -265,7 +266,9 @@  static void dec_calc(DisasContext *dc, uint32_t insn)
         switch (op1) {
         case 0x00:    /* l.add */
             LOG_DIS("l.add r%d, r%d, r%d\n", rd, ra, rb);
-            {
+            if (dc->cpufeature & OPENRISC_FEATURE_NOFLAGS) {
+                tcg_gen_add_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            } else {
                 int lab = gen_new_label();
                 TCGv_i64 ta = tcg_temp_new_i64();
                 TCGv_i64 tb = tcg_temp_new_i64();
@@ -306,7 +309,9 @@  static void dec_calc(DisasContext *dc, uint32_t insn)
         switch (op1) {
         case 0x00:
             LOG_DIS("l.addc r%d, r%d, r%d\n", rd, ra, rb);
-            {
+            if (dc->cpufeature & OPENRISC_FEATURE_NOFLAGS) {
+                gen_illegal_exception(dc);
+            } else {
                 int lab = gen_new_label();
                 TCGv_i64 ta = tcg_temp_new_i64();
                 TCGv_i64 tb = tcg_temp_new_i64();
@@ -355,7 +360,9 @@  static void dec_calc(DisasContext *dc, uint32_t insn)
         switch (op1) {
         case 0x00:
             LOG_DIS("l.sub r%d, r%d, r%d\n", rd, ra, rb);
-            {
+            if (dc->cpufeature & OPENRISC_FEATURE_NOFLAGS) {
+                tcg_gen_sub_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            } else {
                 int lab = gen_new_label();
                 TCGv_i64 ta = tcg_temp_new_i64();
                 TCGv_i64 tb = tcg_temp_new_i64();
@@ -952,7 +959,9 @@  static void dec_misc(DisasContext *dc, uint32_t insn)
 
     case 0x27:    /* l.addi */
         LOG_DIS("l.addi r%d, r%d, %d\n", rd, ra, I16);
-        {
+        if (dc->cpufeature & OPENRISC_FEATURE_NOFLAGS) {
+            tcg_gen_addi_tl(cpu_R[rd], cpu_R[ra], sign_extend(I16, 16));
+        } else {
             if (I16 == 0) {
                 tcg_gen_mov_tl(cpu_R[rd], cpu_R[ra]);
             } else {
@@ -987,6 +996,9 @@  static void dec_misc(DisasContext *dc, uint32_t insn)
 
     case 0x28:    /* l.addic */
         LOG_DIS("l.addic r%d, r%d, %d\n", rd, ra, I16);
+        if (dc->cpufeature & OPENRISC_FEATURE_NOFLAGS) {
+            gen_illegal_exception(dc);
+        }
         {
             int lab = gen_new_label();
             TCGv_i64 ta = tcg_temp_new_i64();
@@ -1744,6 +1756,7 @@  static inline void gen_intermediate_code_internal(OpenRISCCPU *cpu,
     dc->j_state = JUMP_DYNAMIC;
     dc->j_target = 0;
     dc->flags = cpu->env.cpucfgr;
+    dc->cpufeature = cpu->feature;
     dc->mem_idx = cpu_mmu_index(&cpu->env);
     dc->synced_flags = dc->tb_flags = tb->flags;
     dc->delayed_branch = !!(dc->tb_flags & D_FLAG);