@@ -37,6 +37,8 @@ struct power_pmu {
void (*config_bhrb)(u64 pmu_bhrb_filter);
void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
int (*limited_pmc_event)(u64 event_id);
+ void (*get_mem_data_src)(union perf_mem_data_src *dsrc,
+ struct pt_regs *regs);
u32 flags;
const struct attribute_group **attr_groups;
int n_generic;
@@ -1627,6 +1627,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
data.br_stack = &cpuhw->bhrb_stack;
}
+ if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
+ ppmu->get_mem_data_src)
+ ppmu->get_mem_data_src(&data.data_src, regs);
+
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
}
@@ -209,6 +209,69 @@ static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[])
return nalt;
}
+#define POWER7_MMCRA_MDTLB_MISS (0x1LL << 50)
+#define POWER7_MMCRA_MDTLB_SRC_SHIFT 46
+#define POWER7_MMCRA_MDTLB_SRC_MASK (0xFLL << POWER7_MMCRA_MDTLB_SRC_SHIFT)
+
+/*
+ * Map MDTLB_SRC fields to the Linux memory hierarchy levels.
+ *
+ * Bits 14..17 in the MMCRA indicate the source of a marked-data-TLB miss,
+ * with of the 16 possible values referring to a specific source. Eg: if
+ * the 4-bits have the value 1 (0b0001), the mdtlb entry was found in the
+ * local L3 cache.
+ *
+ * We use the table, mdtlb_src_map, to map the value in this field, to
+ * PERF_MEM_TLB_L3, the arch-neutral representation of TLB L3 cache.
+ *
+ * Architecture neutral to Power7 hierarchy levels:
+ * 1-hop = different core on same chip (L2.1 or L3.1)
+ * 2-hops = remote (different chip on same node, RL2L3, RMEM)
+ * 3-hops = distant (different node, DL2L3, DMEM)
+ */
+#define P(a, b) PERF_MEM_S(a, b)
+#define TD(a, b) (P(TLB, CCE_DIRTY) | P(a, b))
+
+static u64 mdtlb_src_map[] = {
+ P(TLB, L2), /* 00: FROM_L2 */
+ P(TLB, L3), /* 01: FROM_L3 */
+
+ P(TLB, NA), /* 02: Reserved */
+ P(TLB, NA), /* 03: Reserved */
+
+ P(TLB, REM_L2_CCE1), /* 04: FROM_L2.1_SHR */
+ TD(TLB, REM_L2_CCE1), /* 05: FROM_L2.1_MOD */
+
+ P(TLB, REM_L3_CCE1), /* 06: FROM_L3.1_SHR */
+ TD(TLB, REM_L3_CCE1), /* 07: FROM_L3.1_MOD */
+
+ P(TLB, REM_CCE2), /* 08: FROM_RL2L3_SHR */
+ TD(TLB, REM_CCE2), /* 09: FROM_RL2L3_MOD */
+
+ P(TLB, REM_CCE3), /* 10: FROM_DL2L3_SHR */
+ TD(TLB, REM_CCE3), /* 11: FROM_DL2L3_MOD */
+
+ P(TLB, LOC_RAM), /* 12: FROM_LMEM */
+ P(TLB, REM_RAM2), /* 13: FROM_RMEM */
+ P(TLB, REM_RAM3), /* 14: FROM_DMEM */
+
+ P(TLB, NA), /* 15: Reserved */
+};
+
+static void power7_get_mem_data_src(union perf_mem_data_src *dsrc,
+ struct pt_regs *regs)
+{
+ u64 idx;
+ u64 mmcra = regs->dsisr;
+
+ if (mmcra & POWER7_MMCRA_MDTLB_MISS) {
+ idx = mmcra & POWER7_MMCRA_MDTLB_SRC_MASK;
+ idx >>= POWER7_MMCRA_MDTLB_SRC_SHIFT;
+
+ dsrc->val |= mdtlb_src_map[idx];
+ }
+}
+
/*
* Returns 1 if event counts things relating to marked instructions
* and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
@@ -447,6 +510,7 @@ static struct power_pmu power7_pmu = {
.compute_mmcr = power7_compute_mmcr,
.get_constraint = power7_get_constraint,
.get_alternatives = power7_get_alternatives,
+ .get_mem_data_src = power7_get_mem_data_src,
.disable_pmc = power7_disable_pmc,
.flags = PPMU_ALT_SIPR,
.attr_groups = power7_pmu_attr_groups,
@@ -626,8 +626,8 @@ union perf_mem_data_src {
mem_lvl:14, /* memory hierarchy level */
mem_snoop:5, /* snoop mode */
mem_lock:2, /* lock instr */
- mem_dtlb:7, /* tlb access */
- mem_rsvd:31;
+ mem_dtlb:17, /* tlb access */
+ mem_rsvd:21;
};
};
@@ -678,6 +678,16 @@ union perf_mem_data_src {
#define PERF_MEM_TLB_L2 0x10 /* L2 */
#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
+#define PERF_MEM_TLB_L3 0x80
+#define PERF_MEM_TLB_REM_L2_CCE1 0x100 /* Remote L2 cache (1 hop) */
+#define PERF_MEM_TLB_REM_L3_CCE1 0x200 /* Remote L3 cache (1 hop) */
+#define PERF_MEM_TLB_REM_CCE2 0x400 /* Remote cache (2 hops) */
+#define PERF_MEM_TLB_REM_CCE3 0x800 /* Remote cache (3 hops) */
+#define PERF_MEM_TLB_LOC_RAM 0x1000 /* Local DRAM */
+#define PERF_MEM_TLB_REM_RAM1 0x2000 /* Remote DRAM (1 hop) */
+#define PERF_MEM_TLB_REM_RAM2 0x4000 /* Remote DRAM (2 hops) */
+#define PERF_MEM_TLB_REM_RAM3 0x8000 /* Remote DRAM (3 hops) */
+#define PERF_MEM_TLB_CCE_DIRTY 0x10000 /* Remote cache entry hit, but dirty */
#define PERF_MEM_S(a, s) \
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)