From patchwork Mon Nov 24 23:26:41 2008 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Carl Love X-Patchwork-Id: 10541 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from ozlabs.org (localhost [127.0.0.1]) by ozlabs.org (Postfix) with ESMTP id 1FBA5DE2DB for ; Tue, 25 Nov 2008 10:28:55 +1100 (EST) X-Original-To: cbe-oss-dev@ozlabs.org Delivered-To: cbe-oss-dev@ozlabs.org Received: from e34.co.us.ibm.com (e34.co.us.ibm.com [32.97.110.152]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client CN "e34.co.us.ibm.com", Issuer "Equifax" (verified OK)) by ozlabs.org (Postfix) with ESMTPS id DE757DDEDF; Tue, 25 Nov 2008 10:27:40 +1100 (EST) Received: from d03relay02.boulder.ibm.com (d03relay02.boulder.ibm.com [9.17.195.227]) by e34.co.us.ibm.com (8.13.1/8.13.1) with ESMTP id mAONR2GY030259; Mon, 24 Nov 2008 16:27:02 -0700 Received: from d03av04.boulder.ibm.com (d03av04.boulder.ibm.com [9.17.195.170]) by d03relay02.boulder.ibm.com (8.13.8/8.13.8/NCO v9.1) with ESMTP id mAONRbDb161018; Mon, 24 Nov 2008 16:27:37 -0700 Received: from d03av04.boulder.ibm.com (loopback [127.0.0.1]) by d03av04.boulder.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id mAONRa7U000652; Mon, 24 Nov 2008 16:27:37 -0700 Received: from [9.47.21.78] (carll-linux-desktop.beaverton.ibm.com [9.47.21.78]) by d03av04.boulder.ibm.com (8.12.11.20060308/8.12.11) with ESMTP id mAONRXbN000574 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO); Mon, 24 Nov 2008 16:27:35 -0700 From: Carl Love To: Arnd Bergmann , linux-kernel , linuxppc-dev@ozlabs.org, oprofile-list@lists.sourceforge.net, cel , cbe-oss-dev Date: Mon, 24 Nov 2008 15:26:41 -0800 Message-Id: <1227569201.6509.214.camel@carll-linux-desktop> Mime-Version: 1.0 X-Mailer: Evolution 2.12.1 Subject: [Cbe-oss-dev] [Patch 2/3] OProfile SPU event profiling support for IBM Cell processor X-BeenThere: cbe-oss-dev@ozlabs.org X-Mailman-Version: 2.1.11 Precedence: list List-Id: Discussion about Open Source Software for the Cell Broadband Engine List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: cbe-oss-dev-bounces+patchwork-incoming=ozlabs.org@ozlabs.org Errors-To: cbe-oss-dev-bounces+patchwork-incoming=ozlabs.org@ozlabs.org This patch basically rearranges the code a bit to make it easier to just add the needed SPU event based profiling routines. The second kernel patch contains the new spu event based profiling code. Signed-off-by: Carl Love Acked-by: Arnd Bergmann Index: Cell_kernel_11_10_2008/arch/powerpc/oprofile/op_model_cell.c =================================================================== --- Cell_kernel_11_10_2008.orig/arch/powerpc/oprofile/op_model_cell.c +++ Cell_kernel_11_10_2008/arch/powerpc/oprofile/op_model_cell.c @@ -40,14 +40,11 @@ #include "../platforms/cell/interrupt.h" #include "cell/pr_util.h" -static void cell_global_stop_spu(void); +static void cell_global_stop_spu_cycles(void); -/* - * spu_cycle_reset is the number of cycles between samples. - * This variable is used for SPU profiling and should ONLY be set - * at the beginning of cell_reg_setup; otherwise, it's read-only. - */ -static unsigned int spu_cycle_reset; +#define PPU_PROFILING 0 +#define SPU_PROFILING_CYCLES 1 +#define SPU_PROFILING_EVENTS 2 #define NUM_SPUS_PER_NODE 8 #define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ @@ -66,6 +63,15 @@ static unsigned int spu_cycle_reset; #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ +/* + * spu_cycle_reset is the number of cycles between samples. + * This variable is used for SPU profiling and should ONLY be set + * at the beginning of cell_reg_setup; otherwise, it's read-only. + */ +static unsigned int spu_cycle_reset; +static unsigned int profiling_mode; + + struct pmc_cntrl_data { unsigned long vcntr; unsigned long evnts; @@ -541,44 +547,32 @@ static void start_virt_cntrs(void) add_timer(&timer_virt_cntr); } -/* This function is called once for all cpus combined */ -static int cell_reg_setup(struct op_counter_config *ctr, +static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, struct op_system_config *sys, int num_ctrs) { int i, j, cpu; - spu_cycle_reset = 0; - - if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { - spu_cycle_reset = ctr[0].count; - - /* - * Each node will need to make the rtas call to start - * and stop SPU profiling. Get the token once and store it. - */ - spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); - - if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { - printk(KERN_ERR - "%s: rtas token ibm,cbe-spu-perftools unknown\n", - __func__); - return -EIO; - } - } - pm_rtas_token = rtas_token("ibm,cbe-perftools"); + spu_cycle_reset = ctr[0].count; /* - * For all events excetp PPU CYCLEs, each node will need to make - * the rtas cbe-perftools call to setup and reset the debug bus. - * Make the token lookup call once and store it in the global - * variable pm_rtas_token. + * Each node will need to make the rtas call to start + * and stop SPU profiling. Get the token once and store it. */ - if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { + spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); + + if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { printk(KERN_ERR - "%s: rtas token ibm,cbe-perftools unknown\n", + "%s: rtas token ibm,cbe-spu-perftools unknown\n", __func__); return -EIO; } + return 0; +} + +static int cell_reg_setup_ppu(struct op_counter_config *ctr, + struct op_system_config *sys, int num_ctrs) +{ + int i, j, cpu; num_counters = num_ctrs; @@ -665,6 +659,42 @@ static int cell_reg_setup(struct op_coun } +/* This function is called once for all cpus combined */ +static int cell_reg_setup(struct op_counter_config *ctr, + struct op_system_config *sys, int num_ctrs) +{ + int ret; + + spu_cycle_reset = 0; + + + /* + * For all events except PPU CYCLEs, each node will need to make + * the rtas cbe-perftools call to setup and reset the debug bus. + * Make the token lookup call once and store it in the global + * variable pm_rtas_token. + */ + pm_rtas_token = rtas_token("ibm,cbe-perftools"); + + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { + printk(KERN_ERR + "%s: rtas token ibm,cbe-perftools unknown\n", + __func__); + return -EIO; + } + + if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { + profiling_mode = SPU_PROFILING_CYCLES; + ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); + } else { + profiling_mode = PPU_PROFILING; + ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); + } + + return ret; +} + + /* This function is called once for each cpu */ static int cell_cpu_setup(struct op_counter_config *cntr) @@ -673,7 +703,11 @@ static int cell_cpu_setup(struct op_coun u32 num_enabled = 0; int i; - if (spu_cycle_reset) + /* Cycle based SPU profiling does not use the performance + * counters. The trace array is configured to collect + * the data. + */ + if (profiling_mode == SPU_PROFILING_CYCLES) return 0; /* There is one performance monitor per processor chip (i.e. node), @@ -686,7 +720,6 @@ static int cell_cpu_setup(struct op_coun cbe_disable_pm(cpu); cbe_disable_pm_interrupts(cpu); - cbe_write_pm(cpu, pm_interval, 0); cbe_write_pm(cpu, pm_start_stop, 0); cbe_write_pm(cpu, group_control, pm_regs.group_control); cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); @@ -885,7 +918,7 @@ static struct notifier_block cpu_freq_no }; #endif -static int cell_global_start_spu(struct op_counter_config *ctr) +static int cell_global_start_spu_cycles(struct op_counter_config *ctr) { int subfunc; unsigned int lfsr_value; @@ -970,7 +1003,7 @@ static int cell_global_start_spu(struct return 0; out_stop: - cell_global_stop_spu(); /* clean up the PMU/debug bus */ + cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */ out: return rtas_error; } @@ -1024,8 +1057,8 @@ static int cell_global_start_ppu(struct static int cell_global_start(struct op_counter_config *ctr) { - if (spu_cycle_reset) - return cell_global_start_spu(ctr); + if (profiling_mode == SPU_PROFILING_CYCLES) + return cell_global_start_spu_cycles(ctr); else return cell_global_start_ppu(ctr); } @@ -1038,7 +1071,7 @@ static int cell_global_start(struct op_c * to enable the performance counters and debug bus will work even if * the hardware was not cleanly reset. */ -static void cell_global_stop_spu(void) +static void cell_global_stop_spu_cycles(void) { int subfunc, rtn_value; unsigned int lfsr_value; @@ -1075,7 +1108,8 @@ static void cell_global_stop_spu(void) pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); } - stop_spu_profiling(); + if (profiling_mode == SPU_PROFILING_CYCLES) + stop_spu_profiling(); } static void cell_global_stop_ppu(void) @@ -1109,14 +1143,14 @@ static void cell_global_stop_ppu(void) static void cell_global_stop(void) { - if (spu_cycle_reset) - cell_global_stop_spu(); - else + if (profiling_mode == PPU_PROFILING) cell_global_stop_ppu(); + else + cell_global_stop_spu_cycles(); } -static void cell_handle_interrupt(struct pt_regs *regs, - struct op_counter_config *ctr) +static void cell_handle_interrupt_ppu(struct pt_regs *regs, + struct op_counter_config *ctr) { u32 cpu; u64 pc; @@ -1188,6 +1222,13 @@ static void cell_handle_interrupt(struct spin_unlock_irqrestore(&virt_cntr_lock, flags); } +static void cell_handle_interrupt(struct pt_regs *regs, + struct op_counter_config *ctr) +{ + if (profiling_mode == PPU_PROFILING) + cell_handle_interrupt_ppu(regs, ctr); +} + /* * This function is called from the generic OProfile * driver. When profiling PPUs, we need to do the @@ -1195,7 +1236,8 @@ static void cell_handle_interrupt(struct */ static int cell_sync_start(void) { - if (spu_cycle_reset) + if ((profiling_mode == SPU_PROFILING_CYCLES) || + (profiling_mode == SPU_PROFILING_EVENTS)) return spu_sync_start(); else return DO_GENERIC_SYNC; @@ -1203,7 +1245,8 @@ static int cell_sync_start(void) static int cell_sync_stop(void) { - if (spu_cycle_reset) + if ((profiling_mode == SPU_PROFILING_CYCLES) || + (profiling_mode == SPU_PROFILING_EVENTS)) return spu_sync_stop(); else return 1;