diff options
| -rw-r--r-- | overlay/gpu-perf.c | 25 | ||||
| -rw-r--r-- | overlay/gpu-perf.h | 4 | ||||
| -rw-r--r-- | overlay/overlay.c | 43 | 
3 files changed, 61 insertions, 11 deletions
| diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c index fc215634..1d35da50 100644 --- a/overlay/gpu-perf.c +++ b/overlay/gpu-perf.c @@ -40,10 +40,12 @@  #if defined(__i386__)  #define rmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define wmb()           asm volatile("lock; addl $0,0(%%esp)" ::: "memory")  #endif  #if defined(__x86_64__)  #define rmb()           asm volatile("lfence" ::: "memory") +#define wmb()           asm volatile("sfence" ::: "memory")  #endif  #define N_PAGES 32 @@ -228,6 +230,14 @@ static int flip_complete(struct gpu_perf *gp, const void *event)  	return 1;  } +static int ctx_switch(struct gpu_perf *gp, const void *event) +{ +	const struct sample_event *sample = event; + +	gp->ctx_switch[sample->raw[1]]++; +	return 1; +} +  static int ring_sync(struct gpu_perf *gp, const void *event)  {  	const struct sample_event *sample = event; @@ -293,6 +303,7 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags)  		perf_tracepoint_open(gp, "i915", "i915_gem_request_wait_end", wait_end);  	perf_tracepoint_open(gp, "i915", "i915_flip_complete", flip_complete);  	perf_tracepoint_open(gp, "i915", "i915_gem_ring_sync_to", ring_sync); +	perf_tracepoint_open(gp, "i915", "i915_gem_ring_switch_context", ctx_switch);  	if (gp->nr_events == 0) {  		gp->error = "i915.ko tracepoints not available"; @@ -303,20 +314,19 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags)  		return;  } -static int process_sample(struct gpu_perf *gp, +static int process_sample(struct gpu_perf *gp, int cpu,  			  const struct perf_event_header *header)  {  	const struct sample_event *sample = (const struct sample_event *)header;  	int n, update = 0;  	/* hash me! */ -	for (n = 0; n < gp->nr_cpus * gp->nr_events; n++) { -		if (gp->sample[n].id != sample->id) +	for (n = 0; n < gp->nr_events; n++) { +		int m = n * gp->nr_cpus + cpu; +		if (gp->sample[m].id != sample->id)  			continue; -		update = 1; -		if (gp->sample[n].func) -			update = gp->sample[n].func(gp, sample); +		update = gp->sample[m].func(gp, sample);  		break;  	} @@ -380,13 +390,14 @@ int gpu_perf_update(struct gpu_perf *gp)  			}  			if (header->type == PERF_RECORD_SAMPLE) -				update += process_sample(gp, header); +				update += process_sample(gp, n, header);  			tail += header->size;  		}  		if (wrap)  			tail &= mask;  		mmap->data_tail = tail; +		wmb();  	}  	free(buffer); diff --git a/overlay/gpu-perf.h b/overlay/gpu-perf.h index 395eb8af..fae60bcb 100644 --- a/overlay/gpu-perf.h +++ b/overlay/gpu-perf.h @@ -41,7 +41,9 @@ struct gpu_perf {  		int (*func)(struct gpu_perf *, const void *);  	} *sample; -	int flip_complete[4]; +	unsigned flip_complete[MAX_RINGS]; +	unsigned ctx_switch[MAX_RINGS]; +  	struct gpu_perf_comm {  		struct gpu_perf_comm *next;  		char name[256]; diff --git a/overlay/overlay.c b/overlay/overlay.c index 65137864..3fddde94 100644 --- a/overlay/overlay.c +++ b/overlay/overlay.c @@ -102,6 +102,7 @@ struct overlay_gpu_top {  struct overlay_gpu_perf {  	struct gpu_perf gpu_perf; +	time_t show_ctx;  };  struct overlay_gpu_freq { @@ -127,6 +128,8 @@ struct overlay_context {  	cairo_t *cr;  	int width, height; +	time_t time; +  	struct overlay_gpu_top gpu_top;  	struct overlay_gpu_perf gpu_perf;  	struct overlay_gpu_freq gpu_freq; @@ -270,6 +273,8 @@ static void init_gpu_perf(struct overlay_context *ctx,  			  struct overlay_gpu_perf *gp)  {  	gpu_perf_init(&gp->gpu_perf, 0); + +	gp->show_ctx = 0;  }  static char *get_comm(pid_t pid, char *comm, int len) @@ -310,6 +315,16 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *  	char buf[1024];  	cairo_pattern_t *linear;  	int x, y, y1, y2, n; +	int has_ctx = 0; + +	gpu_perf_update(&gp->gpu_perf); + +	for (n = 4; n > 0; n--) { +		if (gp->gpu_perf.ctx_switch[n-1]) { +			has_ctx = n; +			break; +		} +	}  	cairo_rectangle(ctx->cr, ctx->width/2+HALF_PAD-.5, PAD-.5, ctx->width/2-SIZE_PAD+1, ctx->height/2-SIZE_PAD+1);  	cairo_set_source_rgb(ctx->cr, .15, .15, .15); @@ -326,12 +341,9 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *  		return;  	} -	gpu_perf_update(&gp->gpu_perf); -  	y = PAD + 12 - 2;  	x = ctx->width/2 + HALF_PAD; -  	for (comm = gp->gpu_perf.comm; comm; comm = comm->next) {  		int total; @@ -369,6 +381,8 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *  		chart_draw(comm->user_data, ctx->cr);  		y2 += 14;  	} +	if (has_ctx || gp->show_ctx) +		y2 += 14;  	y1 += -12 - 2;  	y2 += 14 - 14 + 4; @@ -465,6 +479,27 @@ skip_comm:  	cairo_move_to(ctx->cr, x, y);  	cairo_show_text(ctx->cr, buf);  	y += 14; + +	cairo_set_source_rgba(ctx->cr, 1, 1, 1, 1); +	cairo_move_to(ctx->cr, x, y); +	if (has_ctx) { +		int len = sprintf(buf, "Contexts:"); +		for (n = 0; n < has_ctx; n++) +			len += sprintf(buf + len, "%s %d", +				       n ? "," : "", +				       gp->gpu_perf.ctx_switch[n]); + +		memset(gp->gpu_perf.ctx_switch, 0, sizeof(gp->gpu_perf.ctx_switch)); +		gp->show_ctx = ctx->time; + +		cairo_show_text(ctx->cr, buf); +		y += 14; +	} else if (gp->show_ctx) { +		cairo_show_text(ctx->cr, "Contexts: 0"); +		y += 14; +		if (ctx->time - gp->show_ctx > 10) +			gp->show_ctx = 0; +	}  }  static void init_gpu_freq(struct overlay_context *ctx, @@ -841,6 +876,8 @@ int main(int argc, char **argv)  	i = 0;  	while (1) { +		ctx.time = time(NULL); +  		ctx.cr = cairo_create(ctx.surface);  		cairo_set_operator(ctx.cr, CAIRO_OPERATOR_CLEAR);  		cairo_paint(ctx.cr); | 
