diff options
author | Lionel Landwerlin <lionel.g.landwerlin@intel.com> | 2020-02-19 00:01:28 +0200 |
---|---|---|
committer | Lionel Landwerlin <lionel.g.landwerlin@intel.com> | 2020-02-19 18:46:04 +0200 |
commit | 43116ee368585d4fe37245e7439066a3239ec73e (patch) | |
tree | e63392d4b45336bb6d506892d75cefc44d440e91 | |
parent | f08865e58cd3f6539e93642940fdfdf2dd2e91f8 (diff) |
lib/i915-perf: add i915 perf data reader
This library allows reading a file recorded by i915-perf-recorder.
v2: Pick required libdrm dependency
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Petri Latvala <petri.latvala@intel.com>
-rw-r--r-- | lib/Makefile.sources | 2 | ||||
-rw-r--r-- | lib/i915-perf.pc.in | 1 | ||||
-rw-r--r-- | lib/i915/perf_data_reader.c | 383 | ||||
-rw-r--r-- | lib/i915/perf_data_reader.h | 105 | ||||
-rw-r--r-- | lib/meson.build | 2 |
5 files changed, 493 insertions, 0 deletions
diff --git a/lib/Makefile.sources b/lib/Makefile.sources index 50bce2a8..b9d8d15d 100644 --- a/lib/Makefile.sources +++ b/lib/Makefile.sources @@ -147,6 +147,8 @@ i915_perf_sources = \ i915/perf.c \ i915/perf.h \ i915/perf_data.h \ + i915/perf_data_reader.c \ + i915/perf_data_reader.h \ i915/i915_perf_metrics.c \ i915/i915_perf_metrics.h diff --git a/lib/i915-perf.pc.in b/lib/i915-perf.pc.in index 0c52f6ee..a4a5afc8 100644 --- a/lib/i915-perf.pc.in +++ b/lib/i915-perf.pc.in @@ -6,5 +6,6 @@ includedir=@includedir@ Name: i915-perf Description: i915 perf library Version: @i915_perf_version@ +Requires: libdrm >= 2.4.92 Libs: -L${libdir} -li915_perf Cflags: -I${includedir}/i915-perf diff --git a/lib/i915/perf_data_reader.c b/lib/i915/perf_data_reader.c new file mode 100644 index 00000000..3b05a2e5 --- /dev/null +++ b/lib/i915/perf_data_reader.c @@ -0,0 +1,383 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <i915_drm.h> + +#include "intel_chipset.h" +#include "perf.h" +#include "perf_data_reader.h" + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static inline bool +oa_report_ctx_is_valid(const struct intel_perf_devinfo *devinfo, + const uint8_t *_report) +{ + const uint32_t *report = (const uint32_t *) _report; + + if (devinfo->gen < 8) { + return false; /* TODO */ + } else if (devinfo->gen == 8) { + return report[0] & (1ul << 25); + } else if (devinfo->gen > 8) { + return report[0] & (1ul << 16); + } + + return false; +} + +static uint32_t +oa_report_ctx_id(const struct intel_perf_devinfo *devinfo, const uint8_t *report) +{ + if (!oa_report_ctx_is_valid(devinfo, report)) + return 0xffffffff; + return ((const uint32_t *) report)[2]; +} + +static inline uint64_t +oa_report_timestamp(const uint8_t *report) +{ + return ((const uint32_t *)report)[1]; +} + +static void +append_record(struct intel_perf_data_reader *reader, + const struct drm_i915_perf_record_header *header) +{ + if (reader->n_records >= reader->n_allocated_records) { + reader->n_allocated_records = MAX(100, 2 * reader->n_allocated_records); + reader->records = + (const struct drm_i915_perf_record_header **) + realloc((void *) reader->records, + reader->n_allocated_records * + sizeof(struct drm_i915_perf_record_header *)); + assert(reader->records); + } + + reader->records[reader->n_records++] = header; +} + +static void +append_timestamp_correlation(struct intel_perf_data_reader *reader, + const struct intel_perf_record_timestamp_correlation *corr) +{ + if (reader->n_correlations >= reader->n_allocated_correlations) { + reader->n_allocated_correlations = MAX(100, 2 * reader->n_allocated_correlations); + reader->correlations = + (const struct intel_perf_record_timestamp_correlation **) + realloc((void *) reader->correlations, + reader->n_allocated_correlations * + sizeof(*reader->correlations)); + assert(reader->correlations); + } + + reader->correlations[reader->n_correlations++] = corr; +} + +static struct intel_perf_metric_set * +find_metric_set(struct intel_perf *perf, const char *symbol_name) +{ + struct intel_perf_metric_set *metric_set; + + igt_list_for_each_entry(metric_set, &perf->metric_sets, link) { + if (!strcmp(symbol_name, metric_set->symbol_name)) + return metric_set; + } + + return NULL; +} + +static void +init_devinfo(struct intel_perf_devinfo *perf_devinfo, + const struct intel_device_info *devinfo, + uint32_t devid, + uint64_t timestamp_frequency) +{ + perf_devinfo->devid = devid; + perf_devinfo->gen = devinfo->gen; + perf_devinfo->timestamp_frequency = timestamp_frequency; +} + +static bool +parse_data(struct intel_perf_data_reader *reader) +{ + const struct intel_perf_record_device_info *record_info; + const struct intel_perf_record_device_topology *record_topology; + const struct intel_device_info *devinfo; + const uint8_t *end = reader->mmap_data + reader->mmap_size; + const uint8_t *iter = reader->mmap_data; + + while (iter < end) { + const struct drm_i915_perf_record_header *header = + (const struct drm_i915_perf_record_header *) iter; + + switch (header->type) { + case DRM_I915_PERF_RECORD_SAMPLE: + append_record(reader, header); + break; + + case DRM_I915_PERF_RECORD_OA_REPORT_LOST: + case DRM_I915_PERF_RECORD_OA_BUFFER_LOST: + assert(header->size == sizeof(*header)); + break; + + case INTEL_PERF_RECORD_TYPE_VERSION: { + struct intel_perf_record_version *version = + (struct intel_perf_record_version*) (header + 1); + if (version->version != INTEL_PERF_RECORD_VERSION) { + snprintf(reader->error_msg, sizeof(reader->error_msg), + "Unsupported recording version (%u, expected %u)", + version->version, INTEL_PERF_RECORD_VERSION); + return false; + } + break; + } + + case INTEL_PERF_RECORD_TYPE_DEVICE_INFO: { + reader->record_info = header + 1; + assert(header->size == (sizeof(struct intel_perf_record_device_info) + + sizeof(*header))); + break; + } + + case INTEL_PERF_RECORD_TYPE_DEVICE_TOPOLOGY: { + reader->record_topology = header + 1; + break; + } + + case INTEL_PERF_RECORD_TYPE_TIMESTAMP_CORRELATION: { + append_timestamp_correlation(reader, + (const struct intel_perf_record_timestamp_correlation *) (header + 1)); + break; + } + } + + iter += header->size; + } + + if (!reader->record_info || + !reader->record_topology) { + snprintf(reader->error_msg, sizeof(reader->error_msg), + "Invalid file, missing device or topology info"); + return false; + } + + record_info = reader->record_info; + record_topology = reader->record_topology; + + devinfo = intel_get_device_info(record_info->device_id); + if (!devinfo) { + snprintf(reader->error_msg, sizeof(reader->error_msg), + "Recording occured on unsupported device (0x%x)", + record_info->device_id); + return false; + } + + init_devinfo(&reader->devinfo, devinfo, + record_info->device_id, + record_info->timestamp_frequency); + reader->perf = intel_perf_for_devinfo(record_info->device_id, + record_info->device_revision, + record_info->timestamp_frequency, + record_info->gt_min_frequency, + record_info->gt_max_frequency, + &record_topology->topology); + + reader->metric_set_name = record_info->metric_set_name; + reader->metric_set_uuid = record_info->metric_set_uuid; + reader->metric_set = find_metric_set(reader->perf, record_info->metric_set_name); + + return true; +} + +static uint64_t +correlate_gpu_timestamp(struct intel_perf_data_reader *reader, + uint64_t gpu_ts) +{ + /* OA reports only have the lower 32bits of the timestamp + * register, while our correlation data has the whole 36bits. + * Try to figure what portion of the correlation data the + * 32bit timestamp belongs to. + */ + uint64_t mask = 0xffffffff; + int corr_idx = -1; + + for (uint32_t i = 0; i < reader->n_correlation_chunks; i++) { + if (gpu_ts >= (reader->correlation_chunks[i].gpu_ts_begin & mask) && + gpu_ts <= (reader->correlation_chunks[i].gpu_ts_end & mask)) { + corr_idx = reader->correlation_chunks[i].idx; + break; + } + } + + /* Not found? Assume prior to the first timestamp correlation. + */ + if (corr_idx < 0) { + return reader->correlations[0]->cpu_timestamp - + ((reader->correlations[0]->gpu_timestamp & mask) - gpu_ts) * + (reader->correlations[1]->cpu_timestamp - reader->correlations[0]->cpu_timestamp) / + (reader->correlations[1]->gpu_timestamp - reader->correlations[0]->gpu_timestamp); + } + + for (uint32_t i = corr_idx; i < (reader->n_correlations - 1); i++) { + if (gpu_ts >= (reader->correlations[i]->gpu_timestamp & mask) && + gpu_ts < (reader->correlations[i + 1]->gpu_timestamp & mask)) { + return reader->correlations[i]->cpu_timestamp + + (gpu_ts - (reader->correlations[i]->gpu_timestamp & mask)) * + (reader->correlations[i + 1]->cpu_timestamp - reader->correlations[i]->cpu_timestamp) / + (reader->correlations[i + 1]->gpu_timestamp - reader->correlations[i]->gpu_timestamp); + } + } + + /* This is a bit harsh, but the recording tool should ensure we have + * sampling points on either side of the bag of OA reports. + */ + assert(0); +} + +static void +append_timeline_event(struct intel_perf_data_reader *reader, + uint64_t ts_start, uint64_t ts_end, + uint32_t record_start, uint32_t record_end, + uint32_t hw_id) +{ + if (reader->n_timelines >= reader->n_allocated_timelines) { + reader->n_allocated_timelines = MAX(100, 2 * reader->n_allocated_timelines); + reader->timelines = + (struct intel_perf_timeline_item *) + realloc((void *) reader->timelines, + reader->n_allocated_timelines * + sizeof(*reader->timelines)); + assert(reader->timelines); + } + + reader->timelines[reader->n_timelines].ts_start = ts_start; + reader->timelines[reader->n_timelines].ts_end = ts_end; + reader->timelines[reader->n_timelines].cpu_ts_start = + correlate_gpu_timestamp(reader, ts_start); + reader->timelines[reader->n_timelines].cpu_ts_end = + correlate_gpu_timestamp(reader, ts_end); + reader->timelines[reader->n_timelines].record_start = record_start; + reader->timelines[reader->n_timelines].record_end = record_end; + reader->timelines[reader->n_timelines].hw_id = hw_id; + reader->n_timelines++; +} + +static void +generate_cpu_events(struct intel_perf_data_reader *reader) +{ + uint32_t last_header_idx = 0; + const struct drm_i915_perf_record_header *last_header = reader->records[0]; + + for (uint32_t i = 1; i < reader->n_records; i++) { + const struct drm_i915_perf_record_header *current_header = + reader->records[i]; + const uint8_t *start_report = (const uint8_t *) (last_header + 1), + *end_report = (const uint8_t *) (current_header + 1); + uint32_t last_ctx_id = oa_report_ctx_id(&reader->devinfo, start_report), + current_ctx_id = oa_report_ctx_id(&reader->devinfo, end_report); + uint64_t gpu_ts_start = oa_report_timestamp(start_report), + gpu_ts_end = oa_report_timestamp(end_report); + + if (last_ctx_id == current_ctx_id) + continue; + + append_timeline_event(reader, gpu_ts_start, gpu_ts_end, last_header_idx, i, last_ctx_id); + + last_header = current_header; + last_header_idx = i; + } +} + +static void +compute_correlation_chunks(struct intel_perf_data_reader *reader) +{ + uint64_t mask = ~(0xffffffff); + uint32_t last_idx = 0; + uint64_t last_ts = reader->correlations[last_idx]->gpu_timestamp; + + for (uint32_t i = 0; i < reader->n_correlations; i++) { + if (!reader->n_correlation_chunks || + (last_ts & mask) != (reader->correlations[i]->gpu_timestamp & mask)) { + assert(reader->n_correlation_chunks < ARRAY_SIZE(reader->correlation_chunks)); + reader->correlation_chunks[reader->n_correlation_chunks].gpu_ts_begin = last_ts; + reader->correlation_chunks[reader->n_correlation_chunks].gpu_ts_end = last_ts | ~mask; + reader->correlation_chunks[reader->n_correlation_chunks].idx = last_idx; + last_ts = reader->correlation_chunks[reader->n_correlation_chunks].gpu_ts_end + 1; + last_idx = i; + reader->n_correlation_chunks++; + } + } +} + +bool +intel_perf_data_reader_init(struct intel_perf_data_reader *reader, + int perf_file_fd) +{ + struct stat st; + if (fstat(perf_file_fd, &st) != 0) { + snprintf(reader->error_msg, sizeof(reader->error_msg), + "Unable to access file (%s)", strerror(errno)); + return false; + } + + memset(reader, 0, sizeof(*reader)); + + reader->mmap_size = st.st_size; + reader->mmap_data = (const uint8_t *) mmap(NULL, st.st_size, + PROT_READ, MAP_PRIVATE, + perf_file_fd, 0); + if (reader->mmap_data == MAP_FAILED) { + snprintf(reader->error_msg, sizeof(reader->error_msg), + "Unable to access file (%s)", strerror(errno)); + return false; + } + + if (!parse_data(reader)) + return false; + + compute_correlation_chunks(reader); + generate_cpu_events(reader); + + return true; +} + +void +intel_perf_data_reader_fini(struct intel_perf_data_reader *reader) +{ + intel_perf_free(reader->perf); + free(reader->records); + free(reader->timelines); + free(reader->correlations); + munmap((void *)reader->mmap_data, reader->mmap_size); +} diff --git a/lib/i915/perf_data_reader.h b/lib/i915/perf_data_reader.h new file mode 100644 index 00000000..f625f12d --- /dev/null +++ b/lib/i915/perf_data_reader.h @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef PERF_DATA_READER_H +#define PERF_DATA_READER_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Helper to read a i915-perf recording. */ + +#include <stdbool.h> +#include <stdint.h> + +#include "perf.h" +#include "perf_data.h" + +struct intel_perf_timeline_item { + uint64_t ts_start; + uint64_t ts_end; + uint64_t cpu_ts_start; + uint64_t cpu_ts_end; + + /* Offsets into intel_perf_data_reader.records */ + uint32_t record_start; + uint32_t record_end; + + uint32_t hw_id; + + /* User associated data with a given item on the i915 perf + * timeline. + */ + void *user_data; +}; + +struct intel_perf_data_reader { + /* Array of pointers into the mmapped i915 perf file. */ + const struct drm_i915_perf_record_header **records; + uint32_t n_records; + uint32_t n_allocated_records; + + /**/ + struct intel_perf_timeline_item *timelines; + uint32_t n_timelines; + uint32_t n_allocated_timelines; + + /**/ + const struct intel_perf_record_timestamp_correlation **correlations; + uint32_t n_correlations; + uint32_t n_allocated_correlations; + + struct { + uint64_t gpu_ts_begin; + uint64_t gpu_ts_end; + uint32_t idx; + } correlation_chunks[4]; + uint32_t n_correlation_chunks; + + const char *metric_set_uuid; + const char *metric_set_name; + + struct intel_perf_devinfo devinfo; + + struct intel_perf *perf; + struct intel_perf_metric_set *metric_set; + + char error_msg[256]; + + /**/ + const void *record_info; + const void *record_topology; + + const uint8_t *mmap_data; + size_t mmap_size; +}; + +bool intel_perf_data_reader_init(struct intel_perf_data_reader *reader, + int perf_file_fd); +void intel_perf_data_reader_fini(struct intel_perf_data_reader *reader); + +#ifdef __cplusplus +}; +#endif + +#endif /* PERF_DATA_READER_H */ diff --git a/lib/meson.build b/lib/meson.build index e396a8a6..8112bec4 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -174,6 +174,7 @@ lib_igt_perf = declare_dependency(link_with : lib_igt_perf_build, i915_perf_files = [ 'igt_list.c', 'i915/perf.c', + 'i915/perf_data_reader.c', ] i915_perf_hardware = [ @@ -221,6 +222,7 @@ install_headers( 'intel_chipset.h', 'i915/perf.h', 'i915/perf_data.h', + 'i915/perf_data_reader.h', subdir : 'i915-perf' ) |