summaryrefslogtreecommitdiff
path: root/scripts/trace.pl
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/trace.pl')
-rwxr-xr-xscripts/trace.pl990
1 files changed, 990 insertions, 0 deletions
diff --git a/scripts/trace.pl b/scripts/trace.pl
new file mode 100755
index 00000000..1f524aaa
--- /dev/null
+++ b/scripts/trace.pl
@@ -0,0 +1,990 @@
+#! /usr/bin/perl
+#
+# Copyright © 2017 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+
+use strict;
+use warnings;
+use 5.010;
+
+my $gid = 0;
+my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait);
+my @freqs;
+
+my $max_items = 3000;
+my $width_us = 32000;
+my $correct_durations = 0;
+my %ignore_ring;
+my %skip_box;
+my $html = 0;
+my $trace = 0;
+my $avg_delay_stats = 0;
+my $squash_context_id = 0;
+
+my @args;
+
+sub arg_help
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--help' or $_[0] eq '-h') {
+ shift @_;
+print <<ENDHELP;
+Notes:
+
+ The tool parse the output generated by the 'perf script' command after the
+ correct set of i915 tracepoints have been collected via perf record.
+
+ To collect the data:
+
+ ./trace.pl --trace [command-to-be-profiled]
+
+ The above will invoke perf record, or alternatively it can be done directly:
+
+ perf record -a -c 1 -e i915:intel_gpu_freq_change, \
+ i915:i915_gem_request_add, \
+ i915:i915_gem_request_submit, \
+ i915:i915_gem_request_in, \
+ i915:i915_gem_request_out, \
+ i915:intel_engine_notify, \
+ i915:i915_gem_request_wait_begin, \
+ i915:i915_gem_request_wait_end \
+ [command-to-be-profiled]
+
+ Then create the log file with:
+
+ perf script >trace.log
+
+ This file in turn should be piped into this tool which will generate some
+ statistics out of it, or if --html was given HTML output.
+
+ HTML can be viewed from a directory containing the 'vis' JavaScript module.
+ On Ubuntu this can be installed like this:
+
+ apt-get install npm
+ npm install vis
+
+Usage:
+ trace.pl <options> <input-file >output-file
+
+ --help / -h This help text
+ --max-items=num / -m num Maximum number of boxes to put on the
+ timeline. More boxes means more work for
+ the JavaScript engine in the browser.
+ --zoom-width-ms=ms / -z ms Width of the initial timeline zoom
+ --split-requests / -s Try to split out request which were
+ submitted together due coalescing in the
+ driver. May not be 100% accurate and may
+ influence the per-engine statistics so
+ use with care.
+ --ignore-ring=id / -i id Ignore ring with the numerical id when
+ parsing the log (enum intel_engine_id).
+ Can be given multiple times.
+ --skip-box=name / -x name Do not put a certain type of a box on
+ the timeline. One of: queue, ready,
+ execute and ctxsave.
+ Can be given multiple times.
+ --html Generate HTML output.
+ --trace cmd Trace the following command.
+ --avg-delay-stats Print average delay stats.
+ --squash-ctx-id Squash context id by substracting engine
+ id from ctx id.
+ENDHELP
+
+ exit 0;
+ }
+
+ return @_;
+}
+
+sub arg_html
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--html') {
+ shift @_;
+ $html = 1;
+ }
+
+ return @_;
+}
+
+sub arg_avg_delay_stats
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--avg-delay-stats') {
+ shift @_;
+ $avg_delay_stats = 1;
+ }
+
+ return @_;
+}
+
+sub arg_squash_ctx_id
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--squash-ctx-id') {
+ shift @_;
+ $squash_context_id = 1;
+ }
+
+ return @_;
+}
+
+sub arg_trace
+{
+ my @events = ( 'i915:intel_gpu_freq_change',
+ 'i915:i915_gem_request_add',
+ 'i915:i915_gem_request_submit',
+ 'i915:i915_gem_request_in',
+ 'i915:i915_gem_request_out',
+ 'i915:intel_engine_notify',
+ 'i915:i915_gem_request_wait_begin',
+ 'i915:i915_gem_request_wait_end' );
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--trace') {
+ shift @_;
+
+ unshift @_, join(',', @events);
+ unshift @_, ('perf', 'record', '-a', '-c', '1', '-e');
+
+ exec @_;
+ }
+
+ return @_;
+}
+
+sub arg_max_items
+{
+ my $val;
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--max-items' or $_[0] eq '-m') {
+ shift @_;
+ $val = shift @_;
+ } elsif ($_[0] =~ /--max-items=(\d+)/) {
+ shift @_;
+ $val = $1;
+ }
+
+ $max_items = int($val) if defined $val;
+
+ return @_;
+}
+
+sub arg_zoom_width
+{
+ my $val;
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--zoom-width-ms' or $_[0] eq '-z') {
+ shift @_;
+ $val = shift @_;
+ } elsif ($_[0] =~ /--zoom-width-ms=(\d+)/) {
+ shift @_;
+ $val = $1;
+ }
+
+ $width_us = int($val) * 1000 if defined $val;
+
+ return @_;
+}
+
+sub arg_split_requests
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--split-requests' or $_[0] eq '-s') {
+ shift @_;
+ $correct_durations = 1;
+ }
+
+ return @_;
+}
+
+sub arg_ignore_ring
+{
+ my $val;
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--ignore-ring' or $_[0] eq '-i') {
+ shift @_;
+ $val = shift @_;
+ } elsif ($_[0] =~ /--ignore-ring=(\d+)/) {
+ shift @_;
+ $val = $1;
+ }
+
+ $ignore_ring{$val} = 1 if defined $val;
+
+ return @_;
+}
+
+sub arg_skip_box
+{
+ my $val;
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--skip-box' or $_[0] eq '-x') {
+ shift @_;
+ $val = shift @_;
+ } elsif ($_[0] =~ /--skip-box=(\d+)/) {
+ shift @_;
+ $val = $1;
+ }
+
+ $skip_box{$val} = 1 if defined $val;
+
+ return @_;
+}
+
+@args = @ARGV;
+while (@args) {
+ my $left = scalar(@args);
+
+ @args = arg_help(@args);
+ @args = arg_html(@args);
+ @args = arg_avg_delay_stats(@args);
+ @args = arg_squash_ctx_id(@args);
+ @args = arg_trace(@args);
+ @args = arg_max_items(@args);
+ @args = arg_zoom_width(@args);
+ @args = arg_split_requests(@args);
+ @args = arg_ignore_ring(@args);
+ @args = arg_skip_box(@args);
+
+ last if $left == scalar(@args);
+}
+
+die if scalar(@args);
+
+@ARGV = @args;
+
+sub parse_req
+{
+ my ($line, $tp) = @_;
+ state %cache;
+
+ $cache{$tp} = qr/(\d+)\.(\d+):.*$tp.*ring=(\d+), ctx=(\d+), seqno=(\d+), global(?:_seqno)?=(\d+)/ unless exists $cache{$tp};
+
+ if ($line =~ $cache{$tp}) {
+ return ($1, $2, $3, $4, $5, $6);
+ } else {
+ return undef;
+ }
+}
+
+sub parse_req_hw
+{
+ my ($line, $tp) = @_;
+ state %cache;
+
+ $cache{$tp} = qr/(\d+)\.(\d+):.*$tp.*ring=(\d+), ctx=(\d+), seqno=(\d+), global(?:_seqno)?=(\d+), port=(\d+)/ unless exists $cache{$tp};
+
+ if ($line =~ $cache{$tp}) {
+ return ($1, $2, $3, $4, $5, $6, $7);
+ } else {
+ return undef;
+ }
+}
+
+sub parse_req_wait_begin
+{
+ my ($line, $tp) = @_;
+
+ if ($line =~ /(\d+)\.(\d+):.*i915_gem_request_wait_begin.*ring=(\d+), ctx=(\d+), seqno=(\d+)/) {
+ return ($1, $2, $3, $4, $5);
+ } else {
+ return undef;
+ }
+}
+
+sub parse_notify
+{
+ my ($line) = @_;
+
+ if ($line =~ /(\d+)\.(\d+):.*intel_engine_notify.*ring=(\d+), seqno=(\d+)/) {
+ return ($1, $2, $3, $4);
+ } else {
+ return undef;
+ }
+}
+
+sub parse_freq
+{
+ my ($line) = @_;
+
+ if ($line =~ /(\d+)\.(\d+):.*intel_gpu_freq_change.*new_freq=(\d+)/) {
+ return ($1, $2, $3);
+ } else {
+ return undef;
+ }
+}
+
+sub us
+{
+ my ($s, $us) = @_;
+
+ return $s * 1000000 + $us;
+}
+
+sub db_key
+{
+ my ($ring, $ctx, $seqno) = @_;
+
+ return $ring . '/' . $ctx . '/' . $seqno;
+}
+
+sub global_key
+{
+ my ($ring, $seqno) = @_;
+
+ return $ring . '/' . $seqno;
+}
+
+sub sanitize_ctx
+{
+ my ($ctx, $ring) = @_;
+
+ $ctx = $ctx - $ring if $squash_context_id;
+
+ if (exists $ctxdb{$ctx}) {
+ return $ctx . '.' . $ctxdb{$ctx};
+ } else {
+ return $ctx;
+ }
+}
+
+sub ts
+{
+ my ($us) = @_;
+ my ($h, $m, $s);
+
+ $s = int($us / 1000000);
+ $us = $us % 1000000;
+
+ $m = int($s / 60);
+ $s = $s % 60;
+
+ $h = int($m / 60);
+ $m = $m % 60;
+
+ return sprintf('2017-01-01 %02u:%02u:%02u.%06u', int($h), int($m), int($s), int($us));
+}
+
+# Main input loop - parse lines and build the internal representation of the
+# trace using a hash of requests and some auxilliary data structures.
+my $prev_freq = 0;
+my $prev_freq_ts = 0;
+my $oldkernelwa = 0;
+my ($no_queue, $no_in);
+while (<>) {
+ my ($s, $us, $ring, $ctx, $seqno, $global_seqno, $port);
+ my $freq;
+ my $key;
+
+ chomp;
+
+ ($s, $us, $ring, $ctx, $seqno) = parse_req_wait_begin($_);
+ if (defined $s) {
+ my %rw;
+
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ next if exists $reqwait{$key};
+
+ $rw{'key'} = $key;
+ $rw{'ring'} = $ring;
+ $rw{'seqno'} = $seqno;
+ $rw{'ctx'} = $ctx;
+ $rw{'start'} = us($s, $us);
+ $reqwait{$key} = \%rw;
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno) = parse_req($_, 'i915:i915_gem_request_wait_end');
+ if (defined $s) {
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ next unless exists $reqwait{$key};
+
+ $reqwait{$key}->{'end'} = us($s, $us);
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno) = parse_req($_, 'i915:i915_gem_request_add');
+ if (defined $s) {
+ my $orig_ctx = $ctx;
+
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ if (exists $queue{$key}) {
+ $ctxdb{$orig_ctx}++;
+ $ctx = sanitize_ctx($orig_ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+ }
+
+ $queue{$key} = us($s, $us);
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno) = parse_req($_, 'i915:i915_gem_request_submit');
+ if (defined $s) {
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ die if exists $submit{$key};
+ die unless exists $queue{$key};
+
+ $submit{$key} = us($s, $us);
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno, $port) = parse_req_hw($_, 'i915:i915_gem_request_in');
+ if (defined $s) {
+ my %req;
+
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ die if exists $db{$key};
+ if (not exists $queue{$key} and $oldkernelwa) {
+ $no_queue++;
+ next;
+ }
+ die unless exists $queue{$key};
+ die unless exists $submit{$key};
+
+ $req{'start'} = us($s, $us);
+ $req{'ring'} = $ring;
+ $req{'seqno'} = $seqno;
+ $req{'ctx'} = $ctx;
+ $req{'name'} = $ctx . '/' . $seqno;
+ $req{'global'} = $global_seqno;
+ $req{'port'} = $port;
+ $req{'queue'} = $queue{$key};
+ $req{'submit-delay'} = $submit{$key} - $queue{$key};
+ $req{'execute-delay'} = $req{'start'} - $submit{$key};
+ $rings{$ring} = $gid++ unless exists $rings{$ring};
+ $ringmap{$rings{$ring}} = $ring;
+ $db{$key} = \%req;
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno, $port) = parse_req($_, 'i915:i915_gem_request_out');
+ if (defined $s) {
+ my $gkey = global_key($ring, $global_seqno);
+
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ if (not exists $db{$key} and $oldkernelwa) {
+ $no_in++;
+ next;
+ }
+ die unless exists $db{$key};
+ die unless exists $db{$key}->{'start'};
+ die if exists $db{$key}->{'end'};
+
+ $db{$key}->{'end'} = us($s, $us);
+ if (exists $notify{$gkey}) {
+ $db{$key}->{'notify'} = $notify{$gkey};
+ } else {
+ # No notify so far. Maybe it will arrive later which
+ # will be handled in the sanitation loop below.
+ $db{$key}->{'notify'} = $db{$key}->{'end'};
+ $db{$key}->{'no-notify'} = 1;
+ }
+ $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
+ $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ next;
+ }
+
+ ($s, $us, $ring, $seqno) = parse_notify($_);
+ if (defined $s) {
+ next if exists $ignore_ring{$ring};
+ $notify{global_key($ring, $seqno)} = us($s, $us);
+ next;
+ }
+
+ ($s, $us, $freq) = parse_freq($_);
+ if (defined $s) {
+ my $cur = us($s, $us);
+
+ push @freqs, [$prev_freq_ts, $cur, $prev_freq] if $prev_freq;
+ $prev_freq_ts = $cur;
+ $prev_freq = $freq;
+ next;
+ }
+}
+
+# Sanitation pass to fixup up out of order notify and context complete, and to
+# fine the largest seqno to be used for timeline sorting purposes.
+my $max_seqno = 0;
+foreach my $key (keys %db) {
+ my $gkey = global_key($db{$key}->{'ring'}, $db{$key}->{'global'});
+
+ die unless exists $db{$key}->{'start'};
+
+ $max_seqno = $db{$key}->{'seqno'} if $db{$key}->{'seqno'} > $max_seqno;
+
+ unless (exists $db{$key}->{'end'}) {
+ # Context complete not received.
+ if (exists $notify{$gkey}) {
+ # No context complete due req merging - use notify.
+ $db{$key}->{'notify'} = $notify{$gkey};
+ $db{$key}->{'end'} = $db{$key}->{'notify'};
+ $db{$key}->{'no-end'} = 1;
+ } else {
+ # No notify and no context complete - mark it.
+ $db{$key}->{'no-end'} = 1;
+ $db{$key}->{'end'} = $db{$key}->{'start'} + 999;
+ $db{$key}->{'notify'} = $db{$key}->{'end'};
+ $db{$key}->{'incomplete'} = 1;
+ }
+
+ $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
+ $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ } else {
+ # Notify arrived after context complete.
+ if (exists $db{$key}->{'no-notify'} and exists $notify{$gkey}) {
+ delete $db{$key}->{'no-notify'};
+ $db{$key}->{'notify'} = $notify{$gkey};
+ $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
+ $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ }
+ }
+}
+
+# Fix up incompletes
+foreach my $key (keys %db) {
+ next unless exists $db{$key}->{'incomplete'};
+
+ # End the incomplete batch at the time next one starts
+ my $ring = $db{$key}->{'ring'};
+ my $ctx = $db{$key}->{'ctx'};
+ my $seqno = $db{$key}->{'seqno'};
+ my $next_key;
+ my $i = 1;
+
+ do {
+ $next_key = db_key($ring, $ctx, $seqno + $i);
+ $i++;
+ } until ((exists $db{$next_key} and not exists $db{$next_key}->{'incomplete'})
+ or $i > scalar(keys(%db))); # ugly stop hack
+
+ if (exists $db{$next_key}) {
+ $db{$key}->{'notify'} = $db{$next_key}->{'end'};
+ $db{$key}->{'end'} = $db{$key}->{'notify'};
+ $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
+ $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ }
+}
+
+# GPU time accounting
+my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, %batch_count);
+my (%submit_avg, %execute_avg, %ctxsave_avg);
+my $last_ts = 0;
+my $first_ts;
+
+foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+ my $ring = $db{$key}->{'ring'};
+ my $end = $db{$key}->{'end'};
+
+ $first_ts = $db{$key}->{'queue'} if not defined $first_ts or $db{$key}->{'queue'} < $first_ts;
+ $last_ts = $end if $end > $last_ts;
+
+ # Adjust batch start with legacy execlists.
+ # Port == 2 mean batch was merged udring queuing and hasn't actually
+ # been submitted to the gpu until the batch with port < 2 is found.
+ if ($correct_durations and $oldkernelwa and $db{$key}->{'port'} == 2) {
+ my $ctx = $db{$key}->{'ctx'};
+ my $seqno = $db{$key}->{'seqno'};
+ my $next_key;
+ my $i = 1;
+
+ do {
+ $next_key = db_key($ring, $ctx, $seqno + $i);
+ $i++;
+ } until ((exists $db{$next_key} and $db{$next_key}->{'port'} < 2) or $i > scalar(keys(%db))); # ugly stop hack
+
+ if (exists $db{$next_key}) {
+ $db{$key}->{'start'} = $db{$next_key}->{'start'};
+ $db{$key}->{'end'} = $db{$next_key}->{'end'};
+ die if $db{$key}->{'start'} > $db{$key}->{'end'};
+ }
+ }
+
+ $running{$ring} += $end - $db{$key}->{'start'} unless exists $db{$key}->{'no-end'};
+ $runnable{$ring} += $db{$key}->{'execute-delay'};
+ $queued{$ring} += $db{$key}->{'start'} - $db{$key}->{'execute-delay'} - $db{$key}->{'queue'};
+
+ $batch_count{$ring}++;
+
+ # correct duration of merged batches
+ if ($correct_durations and exists $db{$key}->{'no-end'}) {
+ my $start = $db{$key}->{'start'};
+ my $ctx = $db{$key}->{'ctx'};
+ my $seqno = $db{$key}->{'seqno'};
+ my $next_key;
+ my $i = 1;
+
+ do {
+ $next_key = db_key($ring, $ctx, $seqno + $i);
+ $i++;
+ } until (exists $db{$next_key} or $i > scalar(keys(%db))); # ugly stop hack
+
+ # 20us tolerance
+ if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+ $db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
+ $db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
+ $db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
+ $end = $db{$key}->{'notify'};
+ die if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
+ }
+ die if $db{$key}->{'start'} > $db{$key}->{'end'};
+ }
+ $batch_avg{$ring} += $db{$key}->{'duration'};
+ $batch_total_avg{$ring} += $end - $db{$key}->{'start'};
+
+ $submit_avg{$ring} += $db{$key}->{'submit-delay'};
+ $execute_avg{$ring} += $db{$key}->{'execute-delay'};
+ $ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
+}
+
+foreach my $ring (keys %batch_avg) {
+ $batch_avg{$ring} /= $batch_count{$ring};
+ $batch_total_avg{$ring} /= $batch_count{$ring};
+ $submit_avg{$ring} /= $batch_count{$ring};
+ $execute_avg{$ring} /= $batch_count{$ring};
+ $ctxsave_avg{$ring} /= $batch_count{$ring};
+}
+
+# Calculate engine idle time
+my %flat_busy;
+foreach my $gid (sort keys %rings) {
+ my $ring = $ringmap{$rings{$gid}};
+ my (@s_, @e_);
+
+ # Extract all GPU busy intervals and sort them.
+ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+ next unless $db{$key}->{'ring'} == $ring;
+ push @s_, $db{$key}->{'start'};
+ push @e_, $db{$key}->{'end'};
+ die if $db{$key}->{'start'} > $db{$key}->{'end'};
+ }
+
+ die unless $#s_ == $#e_;
+
+ # Flatten the intervals.
+ for my $i (1..$#s_) {
+ last if $i >= @s_; # End of array.
+ die if $e_[$i] < $s_[$i];
+ if ($s_[$i] <= $e_[$i - 1]) {
+ # Current entry overlaps with the previous one. We need
+ # to merge end of the previous interval from the list
+ # with the start of the current one.
+ splice @e_, $i - 1, 1;
+ splice @s_, $i, 1;
+ # Continue with the same element when list got squashed.
+ redo;
+ }
+ }
+
+ # Add up all busy times.
+ my $total = 0;
+ for my $i (0..$#s_) {
+ die if $e_[$i] < $s_[$i];
+
+ $total = $total + ($e_[$i] - $s_[$i]);
+ }
+
+ $flat_busy{$ring} = $total;
+}
+
+my %reqw;
+foreach my $key (keys %reqwait) {
+ $reqw{$reqwait{$key}->{'ring'}} += $reqwait{$key}->{'end'} - $reqwait{$key}->{'start'};
+}
+
+print <<ENDHTML if $html;
+<!DOCTYPE HTML>
+<html>
+<head>
+ <title>i915 GT timeline</title>
+
+ <style type="text/css">
+ body, html {
+ font-family: sans-serif;
+ }
+ </style>
+
+ <script src="node_modules/vis/dist/vis.js"></script>
+ <link href="node_modules/vis//dist/vis.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+
+<button onclick="toggleStackSubgroups()">Toggle stacking</button>
+
+<p>
+pink = requests executing on the GPU<br>
+grey = runnable requests waiting for a slot on GPU<br>
+blue = requests waiting on fences and dependencies before they are runnable<br>
+</p>
+<p>
+Boxes are in format 'ctx-id/seqno'.
+</p>
+<p>
+Use Ctrl+scroll-action to zoom-in/out and scroll-action or dragging to move around the timeline.
+</p>
+
+<div id="visualization"></div>
+
+<script type="text/javascript">
+ var container = document.getElementById('visualization');
+
+ var groups = new vis.DataSet([
+ENDHTML
+
+# var groups = new vis.DataSet([
+# {id: 1, content: 'g0'},
+# {id: 2, content: 'g1'}
+# ]);
+
+sub html_stats
+{
+ my ($stats, $group, $id) = @_;
+ my $name;
+
+ $name = 'Ring' . $group;
+ $name .= '<br><small><br>';
+ $name .= sprintf('%2.2f', $stats->{'idle'}) . '% idle<br><br>';
+ $name .= sprintf('%2.2f', $stats->{'busy'}) . '% busy<br>';
+ $name .= sprintf('%2.2f', $stats->{'runnable'}) . '% runnable<br>';
+ $name .= sprintf('%2.2f', $stats->{'queued'}) . '% queued<br><br>';
+ $name .= sprintf('%2.2f', $stats->{'wait'}) . '% wait<br><br>';
+ $name .= $stats->{'count'} . ' batches<br>';
+ $name .= sprintf('%2.2f', $stats->{'avg'}) . 'us avg batch<br>';
+ $name .= sprintf('%2.2f', $stats->{'total-avg'}) . 'us avg engine batch<br>';
+ $name .= '</small>';
+
+ print "\t{id: $id, content: '$name'},\n";
+}
+
+sub stdio_stats
+{
+ my ($stats, $group, $id) = @_;
+ my $str;
+
+ $str = 'Ring' . $group . ': ';
+ $str .= $stats->{'count'} . ' batches, ';
+ $str .= sprintf('%2.2f (%2.2f) avg batch us, ', $stats->{'avg'}, $stats->{'total-avg'});
+ $str .= sprintf('%2.2f', $stats->{'idle'}) . '% idle, ';
+ $str .= sprintf('%2.2f', $stats->{'busy'}) . '% busy, ';
+ $str .= sprintf('%2.2f', $stats->{'runnable'}) . '% runnable, ';
+ $str .= sprintf('%2.2f', $stats->{'queued'}) . '% queued, ';
+ $str .= sprintf('%2.2f', $stats->{'wait'}) . '% wait';
+ if ($avg_delay_stats) {
+ $str .= ', submit/execute/save-avg=(';
+ $str .= sprintf('%2.2f/%2.2f/%2.2f', $stats->{'submit'}, $stats->{'execute'}, $stats->{'save'});
+ }
+ $str .= ')';
+
+ say $str;
+}
+
+print "\t{id: 0, content: 'Freq'},\n" if $html;
+foreach my $group (sort keys %rings) {
+ my $name;
+ my $ring = $ringmap{$rings{$group}};
+ my $id = 1 + $rings{$group};
+ my $elapsed = $last_ts - $first_ts;
+ my %stats;
+
+ $stats{'idle'} = (1.0 - $flat_busy{$ring} / $elapsed) * 100.0;
+ $stats{'busy'} = $running{$ring} / $elapsed * 100.0;
+ $stats{'runnable'} = $runnable{$ring} / $elapsed * 100.0;
+ $stats{'queued'} = $queued{$ring} / $elapsed * 100.0;
+ $reqw{$ring} = 0 unless exists $reqw{$ring};
+ $stats{'wait'} = $reqw{$ring} / $elapsed * 100.0;
+ $stats{'count'} = $batch_count{$ring};
+ $stats{'avg'} = $batch_avg{$ring};
+ $stats{'total-avg'} = $batch_total_avg{$ring};
+ $stats{'submit'} = $submit_avg{$ring};
+ $stats{'execute'} = $execute_avg{$ring};
+ $stats{'save'} = $ctxsave_avg{$ring};
+
+ if ($html) {
+ html_stats(\%stats, $group, $id);
+ } else {
+ stdio_stats(\%stats, $group, $id);
+ }
+}
+
+exit 0 unless $html;
+
+print <<ENDHTML;
+ ]);
+
+ var items = new vis.DataSet([
+ENDHTML
+
+my $i = 0;
+foreach my $key (sort {$db{$a}->{'queue'} <=> $db{$b}->{'queue'}} keys %db) {
+ my ($name, $ctx, $seqno) = ($db{$key}->{'name'}, $db{$key}->{'ctx'}, $db{$key}->{'seqno'});
+ my ($queue, $start, $notify, $end) = ($db{$key}->{'queue'}, $db{$key}->{'start'}, $db{$key}->{'notify'}, $db{$key}->{'end'});
+ my $submit = $queue + $db{$key}->{'submit-delay'};
+ my ($content, $style);
+ my $group = 1 + $rings{$db{$key}->{'ring'}};
+ my $type = ' type: \'range\',';
+ my $startend;
+ my $skey;
+
+ # submit to execute
+ unless (exists $skip_box{'queue'}) {
+ $skey = 2 * $max_seqno * $ctx + 2 * $seqno;
+ $style = 'color: black; background-color: lightblue;';
+ $content = "$name<br>$db{$key}->{'submit-delay'}us <small>($db{$key}->{'execute-delay'}us)</small>";
+ $startend = 'start: \'' . ts($queue) . '\', end: \'' . ts($submit) . '\'';
+ print "\t{id: $i, key: $skey, $type group: $group, subgroup: 1, subgroupOrder: 1, content: '$content', $startend, style: \'$style\'},\n";
+ $i++;
+ }
+
+ # execute to start
+ unless (exists $skip_box{'ready'}) {
+ $skey = 2 * $max_seqno * $ctx + 2 * $seqno + 1;
+ $style = 'color: black; background-color: lightgrey;';
+ $content = "<small>$name<br>$db{$key}->{'execute-delay'}us</small>";
+ $startend = 'start: \'' . ts($submit) . '\', end: \'' . ts($start) . '\'';
+ print "\t{id: $i, key: $skey, $type group: $group, subgroup: 1, subgroupOrder: 2, content: '$content', $startend, style: \'$style\'},\n";
+ $i++;
+ }
+
+ # start to user interrupt
+ unless (exists $skip_box{'execute'}) {
+ $skey = -2 * $max_seqno * $ctx - 2 * $seqno - 1;
+ if (exists $db{$key}->{'incomplete'}) {
+ $style = 'color: white; background-color: red;';
+ } else {
+ $style = 'color: black; background-color: pink;';
+ }
+ $content = "$name <small>$db{$key}->{'port'}</small>";
+ $content .= ' <small><i>???</i></small> ' if exists $db{$key}->{'incomplete'};
+ $content .= ' <small><i>++</i></small> ' if exists $db{$key}->{'no-end'};
+ $content .= ' <small><i>+</i></small> ' if exists $db{$key}->{'no-notify'};
+ $content .= "<br>$db{$key}->{'duration'}us <small>($db{$key}->{'context-complete-delay'}us)</small>";
+ $startend = 'start: \'' . ts($start) . '\', end: \'' . ts($notify) . '\'';
+ print "\t{id: $i, key: $skey, $type group: $group, subgroup: 2, subgroupOrder: 3, content: '$content', $startend, style: \'$style\'},\n";
+ $i++;
+ }
+
+ # user interrupt to context complete
+ unless (exists $skip_box{'ctxsave'}) {
+ $skey = -2 * $max_seqno * $ctx - 2 * $seqno;
+ $style = 'color: black; background-color: orange;';
+ my $ctxsave = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ $content = "<small>$name<br>${ctxsave}us</small>";
+ $content .= ' <small><i>???</i></small> ' if exists $db{$key}->{'incomplete'};
+ $content .= ' <small><i>++</i></small> ' if exists $db{$key}->{'no-end'};
+ $content .= ' <small><i>+</i></small> ' if exists $db{$key}->{'no-notify'};
+ $startend = 'start: \'' . ts($notify) . '\', end: \'' . ts($end) . '\'';
+ print "\t{id: $i, key: $skey, $type group: $group, subgroup: 2, subgroupOrder: 4, content: '$content', $startend, style: \'$style\'},\n";
+ $i++;
+ }
+
+ $last_ts = $end;
+
+ last if $i > $max_items;
+}
+
+foreach my $item (@freqs) {
+ my ($start, $end, $freq) = @$item;
+ my $startend;
+
+ next if $start > $last_ts;
+
+ $start = $first_ts if $start < $first_ts;
+ $end = $last_ts if $end > $last_ts;
+ $startend = 'start: \'' . ts($start) . '\', end: \'' . ts($end) . '\'';
+ print "\t{id: $i, type: 'range', group: 0, content: '$freq', $startend},\n";
+ $i++;
+}
+
+my $end_ts = ts($first_ts + $width_us);
+$first_ts = ts($first_ts);
+
+print <<ENDHTML;
+ ]);
+
+ function customOrder (a, b) {
+ // order by id
+ return a.subgroupOrder - b.subgroupOrder;
+ }
+
+ // Configuration for the Timeline
+ var options = { groupOrder: 'content',
+ horizontalScroll: true,
+ stack: true,
+ stackSubgroups: false,
+ zoomKey: 'ctrlKey',
+ orientation: 'top',
+ order: customOrder,
+ start: '$first_ts',
+ end: '$end_ts'};
+
+ // Create a Timeline
+ var timeline = new vis.Timeline(container, items, groups, options);
+
+ function toggleStackSubgroups() {
+ options.stackSubgroups = !options.stackSubgroups;
+ timeline.setOptions(options);
+ }
+ENDHTML
+
+print <<ENDHTML;
+</script>
+</body>
+</html>
+ENDHTML