summaryrefslogtreecommitdiff
path: root/tools/intel_perf_counters.c
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2013-04-02 22:54:08 -0700
committerKenneth Graunke <kenneth@whitecape.org>2013-10-03 15:27:52 -0700
commit11d5859b28727e1dac9d5b15b3027938a7023067 (patch)
treec5d7acc37ef29309d1254bcaee1513d6c328ca92 /tools/intel_perf_counters.c
parent16b61932bb1b54a21b67b6c209ba405f5a36f174 (diff)
intel_perf_counters: Add support for Gen7 platforms.
We finally received permission to release this; the counters should be properly documented in the Haswell PRMs. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'tools/intel_perf_counters.c')
-rw-r--r--tools/intel_perf_counters.c199
1 files changed, 199 insertions, 0 deletions
diff --git a/tools/intel_perf_counters.c b/tools/intel_perf_counters.c
index b5283614..23d9ed3a 100644
--- a/tools/intel_perf_counters.c
+++ b/tools/intel_perf_counters.c
@@ -137,6 +137,163 @@ const char *gen6_counter_names[GEN6_COUNTER_COUNT] = {
[28] = "SF active and stalled",
};
+#define GEN7_COUNTER_COUNT 44
+
+/**
+ * Names for aggregating counters A0-A44. Uninitialized fields are "Reserved."
+ */
+const char *gen7_counter_names[GEN7_COUNTER_COUNT] = {
+ /* A0:
+ * The sum of all cycles on all cores actively executing instructions
+ * This does not count the time taken to service Send instructions.
+ * This time is considered by shader active counters to give the result.
+ */
+ [0] = "Aggregated Core Array Active",
+ /* A1:
+ * The sum of all cycles on all cores where the EU is not idle and is
+ * not actively executing ISA instructions. Generally this means that
+ * all loaded threads on the EU are stalled on some data dependency,
+ * but this also includes the time during which the TS is loading the
+ * thread dispatch header into the EU prior to thread execution and no
+ * other thread is fully loaded.
+ */
+ [1] = "Aggregated Core Array Stalled",
+ /* A2:
+ * Total time in clocks the vertex shader spent active on all cores.
+ */
+ [2] = "Vertex Shader Active Time",
+ /* A4:
+ * Total time in clocks the vertex shader spent stalled on all cores -
+ * and the entire core was stalled as well.
+ */
+ [4] = "Vertex Shader Stall Time - Core Stall",
+ /* A5: Number of VS threads loaded at any given time in the EUs. */
+ [5] = "# VS threads loaded",
+ /* A7:
+ * Total time in clocks the Hull shader spent active on all cores.
+ */
+ [7] = "Hull Shader Active Time",
+ /* A9:
+ * Total time in clocks the Hull shader spent stalled on all cores -
+ * and the entire core was stalled as well.
+ */
+ [9] = "Hull Shader Stall Time - Core Stall",
+ /* A10: Number of HS threads loaded at any given time in the EUs. */
+ [10] = "# HS threads loaded",
+ /* A12:
+ * Total time in clocks the Domain shader spent active on all cores.
+ */
+ [12] = "Domain Shader Active Time",
+ /* A14:
+ * Total time in clocks the domain shader spent stalled on all cores -
+ * and the entire core was stalled as well.
+ */
+ [14] = "Domain Shader Stall Time - Core Stall",
+ /* A15: Number of DS threads loaded at any given time in the EUs. */
+ [15] = "# DS threads loaded",
+ /* A17:
+ * Total time in clocks the compute shader spent active on all cores.
+ */
+ [17] = "Compute Shader Active Time",
+ /* A19:
+ * Total time in clocks the compute shader spent stalled on all cores -
+ * and the entire core was stalled as well.
+ */
+ [19] = "Compute Shader Stall Time - Core Stall",
+ /* A20: Number of CS threads loaded at any given time in the EUs. */
+ [20] = "# CS threads loaded",
+ /* A22:
+ * Total time in clocks the geometry shader spent active on all cores.
+ */
+ [22] = "Geometry Shader Active Time",
+ /* A24:
+ * Total time in clocks the geometry shader spent stalled on all cores -
+ * and the entire core was stalled as well.
+ */
+ [24] = "Geometry Shader Stall Time - Core Stall",
+ /* A25: Number of GS threads loaded at any time in the EUs. */
+ [25] = "# GS threads loaded",
+ /* A27:
+ * Total time in clocks the pixel shader spent active on all cores.
+ */
+ [27] = "Pixel Shader Active Time",
+ /* A29:
+ * Total time in clocks the pixel shader spent stalled on all cores -
+ * and the entire core was stalled as well.
+ */
+ [29] = "Pixel Shader Stall Time - Core Stall",
+ /* A30: Number of PS threads loaded at any given time in the EUs. */
+ [30] = "# PS threads loaded",
+ /* A32: Count of pixels that pass the fast check (8x8). */
+ [32] = "HiZ Fast Z Test Pixels Passing",
+ /* A33: Count of pixels that fail the fast check (8x8). */
+ [33] = "HiZ Fast Z Test Pixels Failing",
+ /* A34: Count of pixels passing the slow check (2x2). */
+ [34] = "Slow Z Test Pixels Passing",
+ /* A35: Count of pixels that fail the slow check (2x2). */
+ [35] = "Slow Z Test Pixels Failing",
+ /* A36: Number of pixels/samples killed in the pixel shader.
+ * Ivybridge/Baytrail Erratum: Count reported is 2X the actual count for
+ * dual source render target messages i.e. when PS has two output colors.
+ */
+ [36] = "Pixel Kill Count",
+ /* A37:
+ * Number of pixels/samples that fail alpha-test. Alpha to coverage
+ * may have some challenges in per-pixel invocation.
+ */
+ [37] = "Alpha Test Pixels Failed",
+ /* A38:
+ * Number of pixels/samples failing stencil test after the pixel shader
+ * has executed.
+ */
+ [38] = "Post PS Stencil Pixels Failed",
+ /* A39:
+ * Number of pixels/samples fail Z test after the pixel shader has
+ * executed.
+ */
+ [39] = "Post PS Z buffer Pixels Failed",
+ /* A40:
+ * Number of render target writes. MRT scenarios will cause this
+ * counter to increment multiple times.
+ */
+ [40] = "3D/GPGPU Render Target Writes",
+ /* A41: Render engine is not idle.
+ *
+ * GPU Busy aggregate counter doesn't increment under the following
+ * conditions:
+ *
+ * 1. Context Switch in Progress.
+ * 2. GPU stalled on executing MI_WAIT_FOR_EVENT.
+ * 3. GPU stalled on execution MI_SEMAPHORE_MBOX.
+ * 4. RCS idle but other parts of GPU active (e.g. only media engines
+ * active)
+ */
+ [41] = "Render Engine Busy",
+ /* A42:
+ * VSunit is stalling VF (upstream unit) and starving HS (downstream
+ * unit).
+ */
+ [42] = "VS bottleneck",
+ /* A43:
+ * GSunit is stalling DS (upstream unit) and starving SOL (downstream
+ * unit).
+ */
+ [43] = "GS bottleneck",
+};
+
+/**
+ * Ivybridge - Counter Select = 101
+ * A4 A3 A2 A1 A0 TIMESTAMP ReportID
+ * A12 A11 A10 A9 A8 A7 A6 A5
+ * A20 A19 A18 A17 A16 A15 A14 A13
+ * A28 A27 A26 A25 A24 A23 A22 A21
+ * A36 A35 A34 A33 A32 A31 A30 A29
+ * A44 A43 A42 A41 A40 A39 A38 A37
+ * C3 C2 C1 C0 B3 B2 B1 B0
+ * C11 C10 C9 C8 C7 C6 C5 C4
+ */
+const int gen7_counter_format = 5; /* 0b101 */
+
int have_totals = 0;
uint32_t *totals;
uint32_t *last_counter;
@@ -243,6 +400,40 @@ gen6_get_counters(void)
drm_intel_bo_unreference(stats_bo);
}
+static void
+gen7_get_counters(void)
+{
+ int i;
+ drm_intel_bo *stats_bo;
+ uint32_t *stats_result;
+
+ stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
+
+ BEGIN_BATCH(3);
+ OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2));
+ OUT_RELOC(stats_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER);
+
+ drm_intel_bo_map(stats_bo, 0);
+ stats_result = stats_bo->virtual;
+ /* skip REPORT_ID, TIMESTAMP */
+ stats_result += 3;
+ for (i = 0; i < GEN7_COUNTER_COUNT; i++) {
+ /* Ignore "Reserved" counters */
+ if (!gen7_counter_names[i])
+ continue;
+ totals[i] += stats_result[i] - last_counter[i];
+ last_counter[i] = stats_result[i];
+ }
+
+ drm_intel_bo_unmap(stats_bo);
+ drm_intel_bo_unreference(stats_bo);
+}
+
#define STATS_CHECK_FREQUENCY 100
#define STATS_REPORT_FREQUENCY 2
@@ -279,6 +470,11 @@ main(int argc, char **argv)
counter_count = GEN6_COUNTER_COUNT;
counter_format = gen6_counter_format;
get_counters = gen6_get_counters;
+ } else if (IS_GEN7(devid)) {
+ counter_name = gen7_counter_names;
+ counter_count = GEN7_COUNTER_COUNT;
+ counter_format = gen7_counter_format;
+ get_counters = gen7_get_counters;
} else {
printf("This tool is not yet supported on your platform.\n");
abort();
@@ -304,6 +500,9 @@ main(int argc, char **argv)
if (l % (STATS_CHECK_FREQUENCY / STATS_REPORT_FREQUENCY) == 0) {
if (have_totals) {
for (i = 0; i < counter_count; i++) {
+ /* Ignore "Reserved" counters */
+ if (!counter_name[i])
+ continue;
printf("%s: %u\n", counter_name[i],
totals[i]);
totals[i] = 0;