diff options
author | Kenneth Graunke <kenneth@whitecape.org> | 2013-03-26 22:06:39 -0700 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-03-27 12:28:52 +0100 |
commit | 43a086269795c5b41bdab0dc2c456be0de97e762 (patch) | |
tree | 046289dcf59a14cf1419ae1dd2578e2680320bc5 /tools | |
parent | 0811556747ce861730d8eb310860080b8840e202 (diff) |
intel_perf_counters: Add support for Sandybridge.
While the Sandybridge PRM doesn't have any documentation on the GPU's
performance counters, a lot of information can be gleaned from the older
Ironlake PRM. Oddly, none of the information documented there actually
appears to apply to Ironlake. However, it apparently works just great
on Sandybridge.
Since this information has all been publicly available on the internet
for around three years, we can use it.
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/intel_perf_counters.c | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/tools/intel_perf_counters.c b/tools/intel_perf_counters.c index fd268b12..b5283614 100644 --- a/tools/intel_perf_counters.c +++ b/tools/intel_perf_counters.c @@ -22,9 +22,21 @@ * * Authors: * Eric Anholt <eric@anholt.net> + * Kenneth Graunke <kenneth@whitecape.org> + * + * While documentation for performance counters is suspiciously missing from the + * Sandybridge PRM, they were documented in Volume 1 Part 3 of the Ironlake PRM. + * + * A lot of the Ironlake PRM actually unintentionally documents Sandybridge + * due to mistakes made when updating the documentation for Gen6+. Many of + * these mislabeled sections carried forward to the public documentation. + * + * The Ironlake PRMs have been publicly available since 2010 and are online at: + * https://01.org/linuxgraphics/documentation/2010-intel-core-processor-family */ #include <unistd.h> +#include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <err.h> @@ -71,6 +83,60 @@ const char *gen5_counter_names[GEN5_COUNTER_COUNT] = { "cycles any EU is stalled for math", }; +#define GEN6_COUNTER_COUNT 29 + +/** + * Sandybridge: Counter Select = 001 + * A0 A1 A2 A3 A4 TIMESTAMP RPT_ID + * A5 A6 A7 A8 A9 A10 A11 A12 + * A13 A14 A15 A16 A17 A18 A19 A20 + * A21 A22 A23 A24 A25 A26 A27 A28 + */ +const int gen6_counter_format = 1; + +/** + * Names for aggregating counters A0-A28. + * + * While the Ironlake PRM clearly documents that there are 29 counters (A0-A28), + * it only lists the names for 28 of them; one is missing. However, careful + * examination reveals a pattern: there are five GS counters (Active, Stall, + * Core Stall, # threads loaded, and ready but not running time). There are + * also five PS counters, in the same order. But there are only four VS + * counters listed - the number of VS threads loaded is missing. Presumably, + * it exists and is counter 5, and the rest are shifted over one place. + */ +const char *gen6_counter_names[GEN6_COUNTER_COUNT] = { + [0] = "Aggregated Core Array Active", + [1] = "Aggregated Core Array Stalled", + [2] = "Vertex Shader Active Time", + [3] = "Vertex Shader Stall Time", + [4] = "Vertex Shader Stall Time - Core Stall", + [5] = "# VS threads loaded", + [6] = "Vertex Shader Ready but not running time", + [7] = "Geometry Shader Active Time", + [8] = "Geometry Shader Stall Time", + [9] = "Geometry Shader Stall Time - Core Stall", + [10] = "# GS threads loaded", + [11] = "Geometry Shader ready but not running Time", + [12] = "Pixel Shader Active Time", + [13] = "Pixel Shader Stall Time", + [14] = "Pixel Shader Stall Time - Core Stall", + [15] = "# PS threads loaded", + [16] = "Pixel Shader ready but not running Time", + [17] = "Early Z Test Pixels Passing", + [18] = "Early Z Test Pixels Failing", + [19] = "Early Stencil Test Pixels Passing", + [20] = "Early Stencil Test Pixels Failing", + [21] = "Pixel Kill Count", + [22] = "Alpha Test Pixels Failed", + [23] = "Post PS Stencil Pixels Failed", + [24] = "Post PS Z buffer Pixels Failed", + [25] = "Pixels/samples Written in the frame buffer", + [26] = "GPU Busy", + [27] = "CL active and not stalled", + [28] = "SF active and stalled", +}; + int have_totals = 0; uint32_t *totals; uint32_t *last_counter; @@ -85,6 +151,20 @@ struct intel_batchbuffer *batch; #define MI_COUNTER_ADDRESS_GTT (1 << 0) /* DW2: report ID */ +/** + * According to the Sandybridge PRM, Volume 1, Part 1, page 48, + * MI_REPORT_PERF_COUNT is now opcode 0x28. The Ironlake PRM, Volume 1, + * Part 3 details how it works. + */ +/* DW0 */ +#define GEN6_MI_REPORT_PERF_COUNT (0x28 << 23) +/* DW1 and 2 are the same as above */ + +/* OACONTROL exists on Gen6+ but is documented in the Ironlake PRM */ +#define OACONTROL 0x2360 +# define OACONTROL_COUNTER_SELECT_SHIFT 2 +# define PERFORMANCE_COUNTER_ENABLE (1 << 0) + static void gen5_get_counters(void) { @@ -124,6 +204,45 @@ gen5_get_counters(void) drm_intel_bo_unreference(stats_bo); } +static void +gen6_get_counters(void) +{ + int i; + drm_intel_bo *stats_bo; + uint32_t *stats_result; + + /* Map from counter names to their index in the buffer object */ + static const int buffer_index[GEN6_COUNTER_COUNT] = + { + 7, 6, 5, 4, 3, + 15, 14, 13, 12, 11, 10, 9, 8, + 23, 22, 21, 20, 19, 18, 17, 16, + 31, 30, 29, 28, 27, 26, 25, 24, + }; + + stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096); + + BEGIN_BATCH(3); + OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2)); + OUT_RELOC(stats_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + MI_COUNTER_ADDRESS_GTT); + OUT_BATCH(0); + ADVANCE_BATCH(); + + intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER); + + drm_intel_bo_map(stats_bo, 0); + stats_result = stats_bo->virtual; + for (i = 0; i < GEN6_COUNTER_COUNT; i++) { + totals[i] += stats_result[buffer_index[i]] - last_counter[i]; + last_counter[i] = stats_result[buffer_index[i]]; + } + + drm_intel_bo_unmap(stats_bo); + drm_intel_bo_unreference(stats_bo); +} + #define STATS_CHECK_FREQUENCY 100 #define STATS_REPORT_FREQUENCY 2 @@ -131,6 +250,7 @@ int main(int argc, char **argv) { uint32_t devid; + int counter_format; int counter_count; const char **counter_name; void (*get_counters)(void); @@ -138,6 +258,7 @@ main(int argc, char **argv) char clear_screen[] = {0x1b, '[', 'H', 0x1b, '[', 'J', 0x0}; + bool oacontrol = true; int fd; int l; @@ -152,10 +273,27 @@ main(int argc, char **argv) counter_name = gen5_counter_names; counter_count = GEN5_COUNTER_COUNT; get_counters = gen5_get_counters; + oacontrol = false; + } else if (IS_GEN6(devid)) { + counter_name = gen6_counter_names; + counter_count = GEN6_COUNTER_COUNT; + counter_format = gen6_counter_format; + get_counters = gen6_get_counters; } else { printf("This tool is not yet supported on your platform.\n"); abort(); } + + if (oacontrol) { + /* Forcewake */ + intel_register_access_init(intel_get_pci_device(), 0); + + /* Enable performance counters */ + intel_register_write(OACONTROL, + counter_format << OACONTROL_COUNTER_SELECT_SHIFT | + PERFORMANCE_COUNTER_ENABLE); + } + totals = calloc(counter_count, sizeof(uint32_t)); last_counter = calloc(counter_count, sizeof(uint32_t)); @@ -180,6 +318,14 @@ main(int argc, char **argv) } } + if (oacontrol) { + /* Disable performance counters */ + intel_register_write(OACONTROL, 0); + + /* Forcewake */ + intel_register_access_fini(); + } + free(totals); free(last_counter); |