summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>2020-02-19 00:01:28 +0200
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>2020-02-19 18:46:04 +0200
commit43116ee368585d4fe37245e7439066a3239ec73e (patch)
treee63392d4b45336bb6d506892d75cefc44d440e91
parentf08865e58cd3f6539e93642940fdfdf2dd2e91f8 (diff)
lib/i915-perf: add i915 perf data reader
This library allows reading a file recorded by i915-perf-recorder. v2: Pick required libdrm dependency Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Chris Wilson <chris@chris-wilson.co.uk> Acked-by: Petri Latvala <petri.latvala@intel.com>
-rw-r--r--lib/Makefile.sources2
-rw-r--r--lib/i915-perf.pc.in1
-rw-r--r--lib/i915/perf_data_reader.c383
-rw-r--r--lib/i915/perf_data_reader.h105
-rw-r--r--lib/meson.build2
5 files changed, 493 insertions, 0 deletions
diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 50bce2a8..b9d8d15d 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -147,6 +147,8 @@ i915_perf_sources = \
i915/perf.c \
i915/perf.h \
i915/perf_data.h \
+ i915/perf_data_reader.c \
+ i915/perf_data_reader.h \
i915/i915_perf_metrics.c \
i915/i915_perf_metrics.h
diff --git a/lib/i915-perf.pc.in b/lib/i915-perf.pc.in
index 0c52f6ee..a4a5afc8 100644
--- a/lib/i915-perf.pc.in
+++ b/lib/i915-perf.pc.in
@@ -6,5 +6,6 @@ includedir=@includedir@
Name: i915-perf
Description: i915 perf library
Version: @i915_perf_version@
+Requires: libdrm >= 2.4.92
Libs: -L${libdir} -li915_perf
Cflags: -I${includedir}/i915-perf
diff --git a/lib/i915/perf_data_reader.c b/lib/i915/perf_data_reader.c
new file mode 100644
index 00000000..3b05a2e5
--- /dev/null
+++ b/lib/i915/perf_data_reader.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <i915_drm.h>
+
+#include "intel_chipset.h"
+#include "perf.h"
+#include "perf_data_reader.h"
+
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static inline bool
+oa_report_ctx_is_valid(const struct intel_perf_devinfo *devinfo,
+ const uint8_t *_report)
+{
+ const uint32_t *report = (const uint32_t *) _report;
+
+ if (devinfo->gen < 8) {
+ return false; /* TODO */
+ } else if (devinfo->gen == 8) {
+ return report[0] & (1ul << 25);
+ } else if (devinfo->gen > 8) {
+ return report[0] & (1ul << 16);
+ }
+
+ return false;
+}
+
+static uint32_t
+oa_report_ctx_id(const struct intel_perf_devinfo *devinfo, const uint8_t *report)
+{
+ if (!oa_report_ctx_is_valid(devinfo, report))
+ return 0xffffffff;
+ return ((const uint32_t *) report)[2];
+}
+
+static inline uint64_t
+oa_report_timestamp(const uint8_t *report)
+{
+ return ((const uint32_t *)report)[1];
+}
+
+static void
+append_record(struct intel_perf_data_reader *reader,
+ const struct drm_i915_perf_record_header *header)
+{
+ if (reader->n_records >= reader->n_allocated_records) {
+ reader->n_allocated_records = MAX(100, 2 * reader->n_allocated_records);
+ reader->records =
+ (const struct drm_i915_perf_record_header **)
+ realloc((void *) reader->records,
+ reader->n_allocated_records *
+ sizeof(struct drm_i915_perf_record_header *));
+ assert(reader->records);
+ }
+
+ reader->records[reader->n_records++] = header;
+}
+
+static void
+append_timestamp_correlation(struct intel_perf_data_reader *reader,
+ const struct intel_perf_record_timestamp_correlation *corr)
+{
+ if (reader->n_correlations >= reader->n_allocated_correlations) {
+ reader->n_allocated_correlations = MAX(100, 2 * reader->n_allocated_correlations);
+ reader->correlations =
+ (const struct intel_perf_record_timestamp_correlation **)
+ realloc((void *) reader->correlations,
+ reader->n_allocated_correlations *
+ sizeof(*reader->correlations));
+ assert(reader->correlations);
+ }
+
+ reader->correlations[reader->n_correlations++] = corr;
+}
+
+static struct intel_perf_metric_set *
+find_metric_set(struct intel_perf *perf, const char *symbol_name)
+{
+ struct intel_perf_metric_set *metric_set;
+
+ igt_list_for_each_entry(metric_set, &perf->metric_sets, link) {
+ if (!strcmp(symbol_name, metric_set->symbol_name))
+ return metric_set;
+ }
+
+ return NULL;
+}
+
+static void
+init_devinfo(struct intel_perf_devinfo *perf_devinfo,
+ const struct intel_device_info *devinfo,
+ uint32_t devid,
+ uint64_t timestamp_frequency)
+{
+ perf_devinfo->devid = devid;
+ perf_devinfo->gen = devinfo->gen;
+ perf_devinfo->timestamp_frequency = timestamp_frequency;
+}
+
+static bool
+parse_data(struct intel_perf_data_reader *reader)
+{
+ const struct intel_perf_record_device_info *record_info;
+ const struct intel_perf_record_device_topology *record_topology;
+ const struct intel_device_info *devinfo;
+ const uint8_t *end = reader->mmap_data + reader->mmap_size;
+ const uint8_t *iter = reader->mmap_data;
+
+ while (iter < end) {
+ const struct drm_i915_perf_record_header *header =
+ (const struct drm_i915_perf_record_header *) iter;
+
+ switch (header->type) {
+ case DRM_I915_PERF_RECORD_SAMPLE:
+ append_record(reader, header);
+ break;
+
+ case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
+ case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
+ assert(header->size == sizeof(*header));
+ break;
+
+ case INTEL_PERF_RECORD_TYPE_VERSION: {
+ struct intel_perf_record_version *version =
+ (struct intel_perf_record_version*) (header + 1);
+ if (version->version != INTEL_PERF_RECORD_VERSION) {
+ snprintf(reader->error_msg, sizeof(reader->error_msg),
+ "Unsupported recording version (%u, expected %u)",
+ version->version, INTEL_PERF_RECORD_VERSION);
+ return false;
+ }
+ break;
+ }
+
+ case INTEL_PERF_RECORD_TYPE_DEVICE_INFO: {
+ reader->record_info = header + 1;
+ assert(header->size == (sizeof(struct intel_perf_record_device_info) +
+ sizeof(*header)));
+ break;
+ }
+
+ case INTEL_PERF_RECORD_TYPE_DEVICE_TOPOLOGY: {
+ reader->record_topology = header + 1;
+ break;
+ }
+
+ case INTEL_PERF_RECORD_TYPE_TIMESTAMP_CORRELATION: {
+ append_timestamp_correlation(reader,
+ (const struct intel_perf_record_timestamp_correlation *) (header + 1));
+ break;
+ }
+ }
+
+ iter += header->size;
+ }
+
+ if (!reader->record_info ||
+ !reader->record_topology) {
+ snprintf(reader->error_msg, sizeof(reader->error_msg),
+ "Invalid file, missing device or topology info");
+ return false;
+ }
+
+ record_info = reader->record_info;
+ record_topology = reader->record_topology;
+
+ devinfo = intel_get_device_info(record_info->device_id);
+ if (!devinfo) {
+ snprintf(reader->error_msg, sizeof(reader->error_msg),
+ "Recording occured on unsupported device (0x%x)",
+ record_info->device_id);
+ return false;
+ }
+
+ init_devinfo(&reader->devinfo, devinfo,
+ record_info->device_id,
+ record_info->timestamp_frequency);
+ reader->perf = intel_perf_for_devinfo(record_info->device_id,
+ record_info->device_revision,
+ record_info->timestamp_frequency,
+ record_info->gt_min_frequency,
+ record_info->gt_max_frequency,
+ &record_topology->topology);
+
+ reader->metric_set_name = record_info->metric_set_name;
+ reader->metric_set_uuid = record_info->metric_set_uuid;
+ reader->metric_set = find_metric_set(reader->perf, record_info->metric_set_name);
+
+ return true;
+}
+
+static uint64_t
+correlate_gpu_timestamp(struct intel_perf_data_reader *reader,
+ uint64_t gpu_ts)
+{
+ /* OA reports only have the lower 32bits of the timestamp
+ * register, while our correlation data has the whole 36bits.
+ * Try to figure what portion of the correlation data the
+ * 32bit timestamp belongs to.
+ */
+ uint64_t mask = 0xffffffff;
+ int corr_idx = -1;
+
+ for (uint32_t i = 0; i < reader->n_correlation_chunks; i++) {
+ if (gpu_ts >= (reader->correlation_chunks[i].gpu_ts_begin & mask) &&
+ gpu_ts <= (reader->correlation_chunks[i].gpu_ts_end & mask)) {
+ corr_idx = reader->correlation_chunks[i].idx;
+ break;
+ }
+ }
+
+ /* Not found? Assume prior to the first timestamp correlation.
+ */
+ if (corr_idx < 0) {
+ return reader->correlations[0]->cpu_timestamp -
+ ((reader->correlations[0]->gpu_timestamp & mask) - gpu_ts) *
+ (reader->correlations[1]->cpu_timestamp - reader->correlations[0]->cpu_timestamp) /
+ (reader->correlations[1]->gpu_timestamp - reader->correlations[0]->gpu_timestamp);
+ }
+
+ for (uint32_t i = corr_idx; i < (reader->n_correlations - 1); i++) {
+ if (gpu_ts >= (reader->correlations[i]->gpu_timestamp & mask) &&
+ gpu_ts < (reader->correlations[i + 1]->gpu_timestamp & mask)) {
+ return reader->correlations[i]->cpu_timestamp +
+ (gpu_ts - (reader->correlations[i]->gpu_timestamp & mask)) *
+ (reader->correlations[i + 1]->cpu_timestamp - reader->correlations[i]->cpu_timestamp) /
+ (reader->correlations[i + 1]->gpu_timestamp - reader->correlations[i]->gpu_timestamp);
+ }
+ }
+
+ /* This is a bit harsh, but the recording tool should ensure we have
+ * sampling points on either side of the bag of OA reports.
+ */
+ assert(0);
+}
+
+static void
+append_timeline_event(struct intel_perf_data_reader *reader,
+ uint64_t ts_start, uint64_t ts_end,
+ uint32_t record_start, uint32_t record_end,
+ uint32_t hw_id)
+{
+ if (reader->n_timelines >= reader->n_allocated_timelines) {
+ reader->n_allocated_timelines = MAX(100, 2 * reader->n_allocated_timelines);
+ reader->timelines =
+ (struct intel_perf_timeline_item *)
+ realloc((void *) reader->timelines,
+ reader->n_allocated_timelines *
+ sizeof(*reader->timelines));
+ assert(reader->timelines);
+ }
+
+ reader->timelines[reader->n_timelines].ts_start = ts_start;
+ reader->timelines[reader->n_timelines].ts_end = ts_end;
+ reader->timelines[reader->n_timelines].cpu_ts_start =
+ correlate_gpu_timestamp(reader, ts_start);
+ reader->timelines[reader->n_timelines].cpu_ts_end =
+ correlate_gpu_timestamp(reader, ts_end);
+ reader->timelines[reader->n_timelines].record_start = record_start;
+ reader->timelines[reader->n_timelines].record_end = record_end;
+ reader->timelines[reader->n_timelines].hw_id = hw_id;
+ reader->n_timelines++;
+}
+
+static void
+generate_cpu_events(struct intel_perf_data_reader *reader)
+{
+ uint32_t last_header_idx = 0;
+ const struct drm_i915_perf_record_header *last_header = reader->records[0];
+
+ for (uint32_t i = 1; i < reader->n_records; i++) {
+ const struct drm_i915_perf_record_header *current_header =
+ reader->records[i];
+ const uint8_t *start_report = (const uint8_t *) (last_header + 1),
+ *end_report = (const uint8_t *) (current_header + 1);
+ uint32_t last_ctx_id = oa_report_ctx_id(&reader->devinfo, start_report),
+ current_ctx_id = oa_report_ctx_id(&reader->devinfo, end_report);
+ uint64_t gpu_ts_start = oa_report_timestamp(start_report),
+ gpu_ts_end = oa_report_timestamp(end_report);
+
+ if (last_ctx_id == current_ctx_id)
+ continue;
+
+ append_timeline_event(reader, gpu_ts_start, gpu_ts_end, last_header_idx, i, last_ctx_id);
+
+ last_header = current_header;
+ last_header_idx = i;
+ }
+}
+
+static void
+compute_correlation_chunks(struct intel_perf_data_reader *reader)
+{
+ uint64_t mask = ~(0xffffffff);
+ uint32_t last_idx = 0;
+ uint64_t last_ts = reader->correlations[last_idx]->gpu_timestamp;
+
+ for (uint32_t i = 0; i < reader->n_correlations; i++) {
+ if (!reader->n_correlation_chunks ||
+ (last_ts & mask) != (reader->correlations[i]->gpu_timestamp & mask)) {
+ assert(reader->n_correlation_chunks < ARRAY_SIZE(reader->correlation_chunks));
+ reader->correlation_chunks[reader->n_correlation_chunks].gpu_ts_begin = last_ts;
+ reader->correlation_chunks[reader->n_correlation_chunks].gpu_ts_end = last_ts | ~mask;
+ reader->correlation_chunks[reader->n_correlation_chunks].idx = last_idx;
+ last_ts = reader->correlation_chunks[reader->n_correlation_chunks].gpu_ts_end + 1;
+ last_idx = i;
+ reader->n_correlation_chunks++;
+ }
+ }
+}
+
+bool
+intel_perf_data_reader_init(struct intel_perf_data_reader *reader,
+ int perf_file_fd)
+{
+ struct stat st;
+ if (fstat(perf_file_fd, &st) != 0) {
+ snprintf(reader->error_msg, sizeof(reader->error_msg),
+ "Unable to access file (%s)", strerror(errno));
+ return false;
+ }
+
+ memset(reader, 0, sizeof(*reader));
+
+ reader->mmap_size = st.st_size;
+ reader->mmap_data = (const uint8_t *) mmap(NULL, st.st_size,
+ PROT_READ, MAP_PRIVATE,
+ perf_file_fd, 0);
+ if (reader->mmap_data == MAP_FAILED) {
+ snprintf(reader->error_msg, sizeof(reader->error_msg),
+ "Unable to access file (%s)", strerror(errno));
+ return false;
+ }
+
+ if (!parse_data(reader))
+ return false;
+
+ compute_correlation_chunks(reader);
+ generate_cpu_events(reader);
+
+ return true;
+}
+
+void
+intel_perf_data_reader_fini(struct intel_perf_data_reader *reader)
+{
+ intel_perf_free(reader->perf);
+ free(reader->records);
+ free(reader->timelines);
+ free(reader->correlations);
+ munmap((void *)reader->mmap_data, reader->mmap_size);
+}
diff --git a/lib/i915/perf_data_reader.h b/lib/i915/perf_data_reader.h
new file mode 100644
index 00000000..f625f12d
--- /dev/null
+++ b/lib/i915/perf_data_reader.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PERF_DATA_READER_H
+#define PERF_DATA_READER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Helper to read a i915-perf recording. */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "perf.h"
+#include "perf_data.h"
+
+struct intel_perf_timeline_item {
+ uint64_t ts_start;
+ uint64_t ts_end;
+ uint64_t cpu_ts_start;
+ uint64_t cpu_ts_end;
+
+ /* Offsets into intel_perf_data_reader.records */
+ uint32_t record_start;
+ uint32_t record_end;
+
+ uint32_t hw_id;
+
+ /* User associated data with a given item on the i915 perf
+ * timeline.
+ */
+ void *user_data;
+};
+
+struct intel_perf_data_reader {
+ /* Array of pointers into the mmapped i915 perf file. */
+ const struct drm_i915_perf_record_header **records;
+ uint32_t n_records;
+ uint32_t n_allocated_records;
+
+ /**/
+ struct intel_perf_timeline_item *timelines;
+ uint32_t n_timelines;
+ uint32_t n_allocated_timelines;
+
+ /**/
+ const struct intel_perf_record_timestamp_correlation **correlations;
+ uint32_t n_correlations;
+ uint32_t n_allocated_correlations;
+
+ struct {
+ uint64_t gpu_ts_begin;
+ uint64_t gpu_ts_end;
+ uint32_t idx;
+ } correlation_chunks[4];
+ uint32_t n_correlation_chunks;
+
+ const char *metric_set_uuid;
+ const char *metric_set_name;
+
+ struct intel_perf_devinfo devinfo;
+
+ struct intel_perf *perf;
+ struct intel_perf_metric_set *metric_set;
+
+ char error_msg[256];
+
+ /**/
+ const void *record_info;
+ const void *record_topology;
+
+ const uint8_t *mmap_data;
+ size_t mmap_size;
+};
+
+bool intel_perf_data_reader_init(struct intel_perf_data_reader *reader,
+ int perf_file_fd);
+void intel_perf_data_reader_fini(struct intel_perf_data_reader *reader);
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif /* PERF_DATA_READER_H */
diff --git a/lib/meson.build b/lib/meson.build
index e396a8a6..8112bec4 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -174,6 +174,7 @@ lib_igt_perf = declare_dependency(link_with : lib_igt_perf_build,
i915_perf_files = [
'igt_list.c',
'i915/perf.c',
+ 'i915/perf_data_reader.c',
]
i915_perf_hardware = [
@@ -221,6 +222,7 @@ install_headers(
'intel_chipset.h',
'i915/perf.h',
'i915/perf_data.h',
+ 'i915/perf_data_reader.h',
subdir : 'i915-perf'
)