From 11d5859b28727e1dac9d5b15b3027938a7023067 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 2 Apr 2013 22:54:08 -0700 Subject: intel_perf_counters: Add support for Gen7 platforms. We finally received permission to release this; the counters should be properly documented in the Haswell PRMs. Signed-off-by: Kenneth Graunke --- tools/intel_perf_counters.c | 199 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) (limited to 'tools/intel_perf_counters.c') diff --git a/tools/intel_perf_counters.c b/tools/intel_perf_counters.c index b5283614..23d9ed3a 100644 --- a/tools/intel_perf_counters.c +++ b/tools/intel_perf_counters.c @@ -137,6 +137,163 @@ const char *gen6_counter_names[GEN6_COUNTER_COUNT] = { [28] = "SF active and stalled", }; +#define GEN7_COUNTER_COUNT 44 + +/** + * Names for aggregating counters A0-A44. Uninitialized fields are "Reserved." + */ +const char *gen7_counter_names[GEN7_COUNTER_COUNT] = { + /* A0: + * The sum of all cycles on all cores actively executing instructions + * This does not count the time taken to service Send instructions. + * This time is considered by shader active counters to give the result. + */ + [0] = "Aggregated Core Array Active", + /* A1: + * The sum of all cycles on all cores where the EU is not idle and is + * not actively executing ISA instructions. Generally this means that + * all loaded threads on the EU are stalled on some data dependency, + * but this also includes the time during which the TS is loading the + * thread dispatch header into the EU prior to thread execution and no + * other thread is fully loaded. + */ + [1] = "Aggregated Core Array Stalled", + /* A2: + * Total time in clocks the vertex shader spent active on all cores. + */ + [2] = "Vertex Shader Active Time", + /* A4: + * Total time in clocks the vertex shader spent stalled on all cores - + * and the entire core was stalled as well. + */ + [4] = "Vertex Shader Stall Time - Core Stall", + /* A5: Number of VS threads loaded at any given time in the EUs. */ + [5] = "# VS threads loaded", + /* A7: + * Total time in clocks the Hull shader spent active on all cores. + */ + [7] = "Hull Shader Active Time", + /* A9: + * Total time in clocks the Hull shader spent stalled on all cores - + * and the entire core was stalled as well. + */ + [9] = "Hull Shader Stall Time - Core Stall", + /* A10: Number of HS threads loaded at any given time in the EUs. */ + [10] = "# HS threads loaded", + /* A12: + * Total time in clocks the Domain shader spent active on all cores. + */ + [12] = "Domain Shader Active Time", + /* A14: + * Total time in clocks the domain shader spent stalled on all cores - + * and the entire core was stalled as well. + */ + [14] = "Domain Shader Stall Time - Core Stall", + /* A15: Number of DS threads loaded at any given time in the EUs. */ + [15] = "# DS threads loaded", + /* A17: + * Total time in clocks the compute shader spent active on all cores. + */ + [17] = "Compute Shader Active Time", + /* A19: + * Total time in clocks the compute shader spent stalled on all cores - + * and the entire core was stalled as well. + */ + [19] = "Compute Shader Stall Time - Core Stall", + /* A20: Number of CS threads loaded at any given time in the EUs. */ + [20] = "# CS threads loaded", + /* A22: + * Total time in clocks the geometry shader spent active on all cores. + */ + [22] = "Geometry Shader Active Time", + /* A24: + * Total time in clocks the geometry shader spent stalled on all cores - + * and the entire core was stalled as well. + */ + [24] = "Geometry Shader Stall Time - Core Stall", + /* A25: Number of GS threads loaded at any time in the EUs. */ + [25] = "# GS threads loaded", + /* A27: + * Total time in clocks the pixel shader spent active on all cores. + */ + [27] = "Pixel Shader Active Time", + /* A29: + * Total time in clocks the pixel shader spent stalled on all cores - + * and the entire core was stalled as well. + */ + [29] = "Pixel Shader Stall Time - Core Stall", + /* A30: Number of PS threads loaded at any given time in the EUs. */ + [30] = "# PS threads loaded", + /* A32: Count of pixels that pass the fast check (8x8). */ + [32] = "HiZ Fast Z Test Pixels Passing", + /* A33: Count of pixels that fail the fast check (8x8). */ + [33] = "HiZ Fast Z Test Pixels Failing", + /* A34: Count of pixels passing the slow check (2x2). */ + [34] = "Slow Z Test Pixels Passing", + /* A35: Count of pixels that fail the slow check (2x2). */ + [35] = "Slow Z Test Pixels Failing", + /* A36: Number of pixels/samples killed in the pixel shader. + * Ivybridge/Baytrail Erratum: Count reported is 2X the actual count for + * dual source render target messages i.e. when PS has two output colors. + */ + [36] = "Pixel Kill Count", + /* A37: + * Number of pixels/samples that fail alpha-test. Alpha to coverage + * may have some challenges in per-pixel invocation. + */ + [37] = "Alpha Test Pixels Failed", + /* A38: + * Number of pixels/samples failing stencil test after the pixel shader + * has executed. + */ + [38] = "Post PS Stencil Pixels Failed", + /* A39: + * Number of pixels/samples fail Z test after the pixel shader has + * executed. + */ + [39] = "Post PS Z buffer Pixels Failed", + /* A40: + * Number of render target writes. MRT scenarios will cause this + * counter to increment multiple times. + */ + [40] = "3D/GPGPU Render Target Writes", + /* A41: Render engine is not idle. + * + * GPU Busy aggregate counter doesn't increment under the following + * conditions: + * + * 1. Context Switch in Progress. + * 2. GPU stalled on executing MI_WAIT_FOR_EVENT. + * 3. GPU stalled on execution MI_SEMAPHORE_MBOX. + * 4. RCS idle but other parts of GPU active (e.g. only media engines + * active) + */ + [41] = "Render Engine Busy", + /* A42: + * VSunit is stalling VF (upstream unit) and starving HS (downstream + * unit). + */ + [42] = "VS bottleneck", + /* A43: + * GSunit is stalling DS (upstream unit) and starving SOL (downstream + * unit). + */ + [43] = "GS bottleneck", +}; + +/** + * Ivybridge - Counter Select = 101 + * A4 A3 A2 A1 A0 TIMESTAMP ReportID + * A12 A11 A10 A9 A8 A7 A6 A5 + * A20 A19 A18 A17 A16 A15 A14 A13 + * A28 A27 A26 A25 A24 A23 A22 A21 + * A36 A35 A34 A33 A32 A31 A30 A29 + * A44 A43 A42 A41 A40 A39 A38 A37 + * C3 C2 C1 C0 B3 B2 B1 B0 + * C11 C10 C9 C8 C7 C6 C5 C4 + */ +const int gen7_counter_format = 5; /* 0b101 */ + int have_totals = 0; uint32_t *totals; uint32_t *last_counter; @@ -243,6 +400,40 @@ gen6_get_counters(void) drm_intel_bo_unreference(stats_bo); } +static void +gen7_get_counters(void) +{ + int i; + drm_intel_bo *stats_bo; + uint32_t *stats_result; + + stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096); + + BEGIN_BATCH(3); + OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2)); + OUT_RELOC(stats_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER); + + drm_intel_bo_map(stats_bo, 0); + stats_result = stats_bo->virtual; + /* skip REPORT_ID, TIMESTAMP */ + stats_result += 3; + for (i = 0; i < GEN7_COUNTER_COUNT; i++) { + /* Ignore "Reserved" counters */ + if (!gen7_counter_names[i]) + continue; + totals[i] += stats_result[i] - last_counter[i]; + last_counter[i] = stats_result[i]; + } + + drm_intel_bo_unmap(stats_bo); + drm_intel_bo_unreference(stats_bo); +} + #define STATS_CHECK_FREQUENCY 100 #define STATS_REPORT_FREQUENCY 2 @@ -279,6 +470,11 @@ main(int argc, char **argv) counter_count = GEN6_COUNTER_COUNT; counter_format = gen6_counter_format; get_counters = gen6_get_counters; + } else if (IS_GEN7(devid)) { + counter_name = gen7_counter_names; + counter_count = GEN7_COUNTER_COUNT; + counter_format = gen7_counter_format; + get_counters = gen7_get_counters; } else { printf("This tool is not yet supported on your platform.\n"); abort(); @@ -304,6 +500,9 @@ main(int argc, char **argv) if (l % (STATS_CHECK_FREQUENCY / STATS_REPORT_FREQUENCY) == 0) { if (have_totals) { for (i = 0; i < counter_count; i++) { + /* Ignore "Reserved" counters */ + if (!counter_name[i]) + continue; printf("%s: %u\n", counter_name[i], totals[i]); totals[i] = 0; -- cgit v1.2.3