summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugeni Dodonov <eugeni.dodonov@intel.com>2011-09-05 16:33:04 -0300
committerEugeni Dodonov <eugeni.dodonov@intel.com>2011-09-05 19:13:58 -0300
commit431fe7803d37f344275fdaceb57c1fbaeee8541c (patch)
tree139b952bfed549f4a56a31d599ab168e303dd70d
parent4b0a15e45d1823888cfed9a76e54036658ab8e61 (diff)
intel_gpu_top: initialize monitoring statistics at startup
This patch initializes the last_stats[] for registers prior to starting the monitoring itself. This way, the first measure will already contain the difference from the previous value instead of non-initialized value. Signed-off-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
-rw-r--r--man/intel_gpu_top.113
-rw-r--r--tools/intel_gpu_top.c220
2 files changed, 191 insertions, 42 deletions
diff --git a/man/intel_gpu_top.1 b/man/intel_gpu_top.1
index 2cbbec98..db2f362e 100644
--- a/man/intel_gpu_top.1
+++ b/man/intel_gpu_top.1
@@ -16,8 +16,21 @@ privilege to map the graphics device.
.B -s [samples per second]
number of samples to acquire per second
.TP
+.B -o [output file]
+run non-interactively and collect usage statistics to [file]
+.TP
+.B -e ["command to profile"]
+execute a command, and leave when it is finished. Note that the entire command
+with all parameters should be included as one parameter.
+.TP
.B -h
show usage notes
+.SH EXAMPLES
+.TP
+intel_gpu_top -o "cairo-trace-gvim.log" -s 100 -e "cairo-perf-trace /tmp/gvim"
+will run cairo-perf-trace with /tmp/gvim trace, non-interactively, saving the
+statistics into cairo-trace-gvim.log file, and collecting 100 samples per
+second.
.PP
Note that idle units are not
displayed, so an entirely idle GPU will only display the ring status and
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
index 2000c176..78167933 100644
--- a/tools/intel_gpu_top.c
+++ b/tools/intel_gpu_top.c
@@ -33,6 +33,8 @@
#include <err.h>
#include <sys/ioctl.h>
#include <sys/time.h>
+#include <sys/wait.h>
+#include <string.h>
#include "intel_gpu_tools.h"
#include "instdone.h"
@@ -373,24 +375,39 @@ static void ring_sample(struct ring *ring)
ring->full += full;
}
+static void ring_print_header(FILE *out, struct ring *ring)
+{
+ fprintf(out, "%.6s%%\tops\t",
+ ring->name
+ );
+}
+
static void ring_print(struct ring *ring, unsigned long samples_per_sec,
FILE *output)
{
int samples_to_percent_ratio, percent, len;
- if (!ring->size)
- return;
-
/* Calculate current value of samples_to_percent_ratio */
samples_to_percent_ratio = (ring->idle * 100) / samples_per_sec;
percent = 100 - samples_to_percent_ratio;
- len = fprintf(output, "%25s busy: %3d%%: ", ring->name, percent);
- print_percentage_bar (percent, len);
- fprintf(output, "%24s space: %d/%d (%d%%)\n",
- ring->name,
- (int)(ring->full / samples_per_sec),
- ring->size,
- (int)((ring->full / samples_to_percent_ratio) / ring->size));
+
+ if (output == stdout) {
+ if (!ring->size)
+ return;
+
+ len = fprintf(output, "%25s busy: %3d%%: ", ring->name, percent);
+ print_percentage_bar (percent, len);
+ fprintf(output, "%24s space: %d/%d (%d%%)\n",
+ ring->name,
+ (int)(ring->full / samples_per_sec),
+ ring->size,
+ (int)((ring->full / samples_to_percent_ratio) / ring->size));
+ } else {
+ fprintf(output, "%3d\t%d\t",
+ (ring->size) ? 100 - ring->idle / samples_to_percent_ratio : -1,
+ (ring->size) ? (int)(ring->full / samples_per_sec) : -1
+ );
+ }
}
static void
@@ -402,6 +419,7 @@ usage(const char *appname)
"\n"
"The following parameters apply:\n"
"[-s <samples>] samples per seconds (default %d)\n"
+ "[-o <file>] output to file (default to stdio)\n"
"[-h] show this help screen\n"
"\n",
appname,
@@ -429,17 +447,29 @@ int main(int argc, char **argv)
int i, ch;
int samples_per_sec = SAMPLES_PER_SEC;
FILE *output = stdout;
+ double elapsed_time=0;
+ int print_headers=1;
+ pid_t child_pid=-1;
+ int child_stat;
+ char *cmd=NULL;
/* Parse options? */
- while ((ch = getopt(argc, argv, "s:h")) != -1)
- {
+ while ((ch = getopt(argc, argv, "s:o:h")) != -1) {
switch (ch) {
+ case 'e': cmd = strdup(optarg);
+ break;
case 's': samples_per_sec = atoi(optarg);
if (samples_per_sec < 100) {
fprintf(stderr, "Error: samples per second must be >= 100\n");
exit(1);
}
break;
+ case 'o': output = fopen(optarg, "w");
+ if (!output) {
+ perror("fopen");
+ exit(1);
+ }
+ break;
case 'h':
usage(argv[0]);
exit(0);
@@ -454,6 +484,37 @@ int main(int argc, char **argv)
argc -= optind;
argv += optind;
+ /* Do we have a command to run? */
+ if (cmd != NULL)
+ {
+ if (output != stdout) {
+ fprintf(output, "# Profiling: %s\n", cmd);
+ fflush(output);
+ }
+ child_pid = fork();
+ if (child_pid < 0)
+ {
+ perror("fork");
+ exit(1);
+ }
+ else if (child_pid == 0) {
+ int res;
+ res = system(cmd);
+ free(cmd);
+ if (res < 0)
+ perror("running command");
+ if (output != stdout) {
+ fflush(output);
+ fprintf(output, "# %s exited with status %d\n", cmd, res);
+ fflush(output);
+ }
+ exit(0);
+ }
+ else {
+ free(cmd);
+ }
+ }
+
pci_dev = intel_get_pci_device();
devid = pci_dev->device_id;
intel_get_mmio(pci_dev);
@@ -473,9 +534,25 @@ int main(int argc, char **argv)
ring_init(&blt_ring);
}
+ /* Initialize GPU stats */
+ if (HAS_STATS_REGS(devid)) {
+ for (i = 0; i < STATS_COUNT; i++) {
+ uint32_t stats_high, stats_low, stats_high_2;
+
+ do {
+ stats_high = INREG(stats_regs[i] + 4);
+ stats_low = INREG(stats_regs[i]);
+ stats_high_2 = INREG(stats_regs[i] + 4);
+ } while (stats_high != stats_high_2);
+
+ last_stats[i] = (uint64_t)stats_high << 32 |
+ stats_low;
+ }
+ }
+
for (;;) {
int j;
- unsigned long long t1, ti, tf;
+ unsigned long long t1, ti, tf, t2;
unsigned long long def_sleep = 1000000 / samples_per_sec;
unsigned long long last_samples_per_sec = samples_per_sec;
char clear_screen[] = {0x1b, '[', 'H',
@@ -546,39 +623,82 @@ int main(int argc, char **argv)
if (max_lines >= num_instdone_bits)
max_lines = num_instdone_bits;
- fprintf(output, "%s", clear_screen);
-
- print_clock_info(pci_dev);
-
- ring_print(&render_ring, last_samples_per_sec, output);
- ring_print(&bsd_ring, last_samples_per_sec, output);
- ring_print(&bsd6_ring, last_samples_per_sec, output);
- ring_print(&blt_ring, last_samples_per_sec, output);
-
- fprintf(output, "\n%30s %s\n", "task", "percent busy");
- for (i = 0; i < max_lines; i++) {
- if (top_bits_sorted[i]->count > 0) {
- percent = (top_bits_sorted[i]->count * 100) /
- last_samples_per_sec;
- len = fprintf(output, "%30s: %3d%%: ",
- top_bits_sorted[i]->bit->name,
- percent);
- print_percentage_bar (percent, len);
- } else {
- fprintf(output, "%*s", PERCENTAGE_BAR_END, "");
+ t2 = gettime();
+ elapsed_time += (t2 - t1) / 1000000.0;
+
+ if (output == stdout) {
+ fprintf(output, "%s", clear_screen);
+ print_clock_info(pci_dev);
+
+ ring_print(&render_ring, last_samples_per_sec, output);
+ ring_print(&bsd_ring, last_samples_per_sec, output);
+ ring_print(&bsd6_ring, last_samples_per_sec, output);
+ ring_print(&blt_ring, last_samples_per_sec, output);
+
+ fprintf(output, "\n%30s %s\n", "task", "percent busy");
+ for (i = 0; i < max_lines; i++) {
+ if (top_bits_sorted[i]->count > 0) {
+ percent = (top_bits_sorted[i]->count * 100) /
+ last_samples_per_sec;
+ len = fprintf(output, "%30s: %3d%%: ",
+ top_bits_sorted[i]->bit->name,
+ percent);
+ print_percentage_bar (percent, len);
+ } else {
+ fprintf(output, "%*s", PERCENTAGE_BAR_END, "");
+ }
+
+ if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
+ fprintf(output, "%13s: %llu (%lld/sec)",
+ stats_reg_names[i],
+ stats[i],
+ stats[i] - last_stats[i]);
+ last_stats[i] = stats[i];
+ } else {
+ if (!top_bits_sorted[i]->count)
+ break;
+ }
+ fprintf(output, "\n");
+ }
+ } else {
+ /* Print headers for columns at first run */
+ if (print_headers) {
+ fprintf(output, "# time\t");
+ ring_print_header(output, &render_ring);
+ ring_print_header(output, &bsd_ring);
+ ring_print_header(output, &bsd6_ring);
+ ring_print_header(output, &blt_ring);
+ for (i = 0; i < MAX_NUM_TOP_BITS; i++) {
+ if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
+ fprintf(output, "%.6s\t",
+ stats_reg_names[i]
+ );
+ }
+ if (!top_bits[i].count)
+ continue;
+ }
+ fprintf(output, "\n");
+ print_headers = 0;
}
- if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
- fprintf(output, "%13s: %llu (%lld/sec)",
- stats_reg_names[i],
- stats[i],
- stats[i] - last_stats[i]);
- last_stats[i] = stats[i];
- } else {
- if (!top_bits_sorted[i]->count)
- break;
+ /* Print statistics */
+ fprintf(output, "%.2f\t", elapsed_time);
+ ring_print(&render_ring, last_samples_per_sec, output);
+ ring_print(&bsd_ring, last_samples_per_sec, output);
+ ring_print(&bsd6_ring, last_samples_per_sec, output);
+ ring_print(&blt_ring, last_samples_per_sec, output);
+
+ for (i = 0; i < MAX_NUM_TOP_BITS; i++) {
+ if (i < STATS_COUNT && HAS_STATS_REGS(devid)) {
+ fprintf(output, "%lu\t",
+ stats[i] - last_stats[i]);
+ last_stats[i] = stats[i];
+ }
+ if (!top_bits[i].count)
+ continue;
}
fprintf(output, "\n");
+ fflush(output);
}
for (i = 0; i < num_instdone_bits; i++) {
@@ -587,7 +707,23 @@ int main(int argc, char **argv)
if (i < STATS_COUNT)
last_stats[i] = stats[i];
}
+
+ /* Check if child has gone */
+ if (child_pid > 0)
+ {
+ int res;
+ if ((res = waitpid(child_pid, &child_stat, WNOHANG)) == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ if (res == 0)
+ continue;
+ if (WIFEXITED(child_stat))
+ break;
+ }
}
+ fclose(output);
+
return 0;
}