/* * Copyright © 2016 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * */ #include "igt.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "drm.h" #include #define sigev_notify_thread_id _sigev_un._tid static volatile int done; struct gem_busyspin { pthread_t thread; unsigned long sz; unsigned long count; bool leak; bool interrupts; }; struct sys_wait { pthread_t thread; struct igt_mean mean; }; static void force_low_latency(void) { int32_t target = 0; int fd = open("/dev/cpu_dma_latency", O_RDWR); if (fd < 0 || write(fd, &target, sizeof(target)) < 0) fprintf(stderr, "Unable to prevent CPU sleeps and force low latency using /dev/cpu_dma_latency: %s\n", strerror(errno)); } #define LOCAL_I915_EXEC_NO_RELOC (1<<11) #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) #define LOCAL_I915_EXEC_BSD_SHIFT (13) #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) #define ENGINE_FLAGS (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) static bool ignore_engine(int fd, unsigned engine) { if (engine == 0) return true; if (gem_has_bsd2(fd) && engine == I915_EXEC_BSD) return true; return false; } static void *gem_busyspin(void *arg) { const uint32_t bbe = MI_BATCH_BUFFER_END; struct gem_busyspin *bs = arg; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 obj[2]; const unsigned sz = bs->sz ? bs->sz + sizeof(bbe) : bs->leak ? 16 << 20 : 4 << 10; unsigned engines[16]; unsigned nengine; unsigned engine; int fd; fd = drm_open_driver(DRIVER_INTEL); nengine = 0; for_each_engine(fd, engine) if (!ignore_engine(fd, engine)) engines[nengine++] = engine; memset(obj, 0, sizeof(obj)); obj[0].handle = gem_create(fd, 4096); obj[0].flags = EXEC_OBJECT_WRITE; obj[1].handle = gem_create(fd, sz); gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe)); memset(&execbuf, 0, sizeof(execbuf)); if (bs->interrupts) { execbuf.buffers_ptr = (uintptr_t)&obj[0]; execbuf.buffer_count = 2; } else { execbuf.buffers_ptr = (uintptr_t)&obj[1]; execbuf.buffer_count = 1; } execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT; execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC; if (__gem_execbuf(fd, &execbuf)) { execbuf.flags = 0; gem_execbuf(fd, &execbuf); } while (!done) { for (int n = 0; n < nengine; n++) { const int m = rand() % nengine; unsigned int tmp = engines[n]; engines[n] = engines[m]; engines[m] = tmp; } for (int n = 0; n < nengine; n++) { execbuf.flags &= ~ENGINE_FLAGS; execbuf.flags |= engines[n]; gem_execbuf(fd, &execbuf); } bs->count += nengine; if (bs->leak) { gem_madvise(fd, obj[1].handle, I915_MADV_DONTNEED); obj[1].handle = gem_create(fd, sz); gem_write(fd, obj[1].handle, bs->sz, &bbe, sizeof(bbe)); } } close(fd); return NULL; } static double elapsed(const struct timespec *a, const struct timespec *b) { return 1e9*(b->tv_sec - a->tv_sec) + (b->tv_nsec - a ->tv_nsec); } static void *sys_wait(void *arg) { struct sys_wait *w = arg; struct sigevent sev; timer_t timer; sigset_t mask; struct timespec now; #define SIG SIGRTMIN sigemptyset(&mask); sigaddset(&mask, SIG); sigprocmask(SIG_SETMASK, &mask, NULL); sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID; sev.sigev_notify_thread_id = gettid(); sev.sigev_signo = SIG; timer_create(CLOCK_MONOTONIC, &sev, &timer); clock_gettime(CLOCK_MONOTONIC, &now); while (!done) { struct itimerspec its; int sigs; its.it_value = now; its.it_value.tv_nsec += 100 * 1000; its.it_value.tv_nsec += rand() % (NSEC_PER_SEC / 1000); if (its.it_value.tv_nsec >= NSEC_PER_SEC) { its.it_value.tv_nsec -= NSEC_PER_SEC; its.it_value.tv_sec += 1; } its.it_interval.tv_sec = its.it_interval.tv_nsec = 0; timer_settime(timer, TIMER_ABSTIME, &its, NULL); sigwait(&mask, &sigs); clock_gettime(CLOCK_MONOTONIC, &now); igt_mean_add(&w->mean, elapsed(&its.it_value, &now)); } sigprocmask(SIG_UNBLOCK, &mask, NULL); timer_delete(timer); return NULL; } #define PAGE_SIZE 4096 static void *sys_thp_alloc(void *arg) { struct sys_wait *w = arg; struct timespec now; clock_gettime(CLOCK_MONOTONIC, &now); while (!done) { const size_t sz = 2 << 20; const struct timespec start = now; void *ptr; ptr = mmap(NULL, sz, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); assert(ptr != MAP_FAILED); madvise(ptr, sz, MADV_HUGEPAGE); for (size_t page = 0; page < sz; page += PAGE_SIZE) *(volatile uint32_t *)((unsigned char *)ptr + page) = 0; munmap(ptr, sz); clock_gettime(CLOCK_MONOTONIC, &now); igt_mean_add(&w->mean, elapsed(&start, &now)); } return NULL; } static void bind_cpu(pthread_attr_t *attr, int cpu) { #ifdef __USE_GNU cpu_set_t mask; if (cpu == -1) return; CPU_ZERO(&mask); CPU_SET(cpu, &mask); pthread_attr_setaffinity_np(attr, sizeof(mask), &mask); #endif } static void rtprio(pthread_attr_t *attr, int prio) { #ifdef PTHREAD_EXPLICIT_SCHED struct sched_param param = { .sched_priority = 99 }; pthread_attr_setinheritsched(attr, PTHREAD_EXPLICIT_SCHED); pthread_attr_setschedpolicy(attr, SCHED_FIFO); pthread_attr_setschedparam(attr, ¶m); #endif } static double l_estimate(igt_stats_t *stats) { if (stats->n_values > 9) return igt_stats_get_trimean(stats); else if (stats->n_values > 5) return igt_stats_get_median(stats); else return igt_stats_get_mean(stats); } static double min_measurement_error(void) { struct timespec start, end; int n; clock_gettime(CLOCK_MONOTONIC, &start); for (n = 0; n < 1024; n++) clock_gettime(CLOCK_MONOTONIC, &end); return elapsed(&start, &end) / n; } static int print_entry(const char *filepath, const struct stat *info, const int typeflag, struct FTW *pathinfo) { int fd; fd = open(filepath, O_RDONLY); if (fd != -1) { void *ptr; ptr = mmap(NULL, info->st_size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, 0); if (ptr != MAP_FAILED) munmap(ptr, info->st_size); close(fd); } return 0; } static void *background_fs(void *path) { while (1) nftw(path, print_entry, 20, FTW_PHYS | FTW_MOUNT); return NULL; } static unsigned long calibrate_nop(unsigned int target_us, unsigned int tolerance_pct) { const uint32_t bbe = MI_BATCH_BUFFER_END; const unsigned int loops = 100; struct drm_i915_gem_exec_object2 obj = {}; struct drm_i915_gem_execbuffer2 eb = { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj}; struct timespec t_0, t_end; long sz, prev; int fd; fd = drm_open_driver(DRIVER_INTEL); clock_gettime(CLOCK_MONOTONIC, &t_0); sz = 256 * 1024; do { struct timespec t_start; obj.handle = gem_create(fd, sz + sizeof(bbe)); gem_write(fd, obj.handle, sz, &bbe, sizeof(bbe)); gem_execbuf(fd, &eb); gem_sync(fd, obj.handle); clock_gettime(CLOCK_MONOTONIC, &t_start); for (int loop = 0; loop < loops; loop++) gem_execbuf(fd, &eb); gem_sync(fd, obj.handle); clock_gettime(CLOCK_MONOTONIC, &t_end); gem_close(fd, obj.handle); prev = sz; sz = loops * sz / elapsed(&t_start, &t_end) * 1e3 * target_us; sz = ALIGN(sz, sizeof(uint32_t)); } while (elapsed(&t_0, &t_end) < 5 || abs(sz - prev) > (sz * tolerance_pct / 100)); close(fd); return sz; } int main(int argc, char **argv) { struct gem_busyspin *busy; struct sys_wait *wait; void *sys_fn = sys_wait; pthread_attr_t attr; pthread_t bg_fs = 0; int ncpus = sysconf(_SC_NPROCESSORS_ONLN); igt_stats_t cycles, mean, max; double min; int time = 10; int field = -1; int enable_gem_sysbusy = 1; bool leak = false; bool interrupts = false; long batch = 0; int n, c; while ((c = getopt(argc, argv, "r:t:f:bmni1")) != -1) { switch (c) { case '1': ncpus = 1; break; case 'n': /* dry run, measure baseline system latency */ enable_gem_sysbusy = 0; break; case 'i': /* interrupts ahoy! */ interrupts = true; break; case 't': /* How long to run the benchmark for (seconds) */ time = atoi(optarg); if (time < 0) time = INT_MAX; break; case 'r': /* Duration of each batch (microseconds) */ batch = atoi(optarg); break; case 'f': /* Select an output field */ field = atoi(optarg); break; case 'b': pthread_create(&bg_fs, NULL, background_fs, (void *)"/"); sleep(5); break; case 'm': sys_fn = sys_thp_alloc; leak = true; break; default: break; } } /* Prevent CPU sleeps so that busy and idle loads are consistent. */ force_low_latency(); min = min_measurement_error(); if (batch > 0) batch = calibrate_nop(batch, 2); else batch = -batch; busy = calloc(ncpus, sizeof(*busy)); pthread_attr_init(&attr); if (enable_gem_sysbusy) { for (n = 0; n < ncpus; n++) { bind_cpu(&attr, n); busy[n].sz = batch; busy[n].leak = leak; busy[n].interrupts = interrupts; pthread_create(&busy[n].thread, &attr, gem_busyspin, &busy[n]); } } wait = calloc(ncpus, sizeof(*wait)); pthread_attr_init(&attr); rtprio(&attr, 99); for (n = 0; n < ncpus; n++) { igt_mean_init(&wait[n].mean); bind_cpu(&attr, n); pthread_create(&wait[n].thread, &attr, sys_fn, &wait[n]); } sleep(time); done = 1; igt_stats_init_with_size(&cycles, ncpus); if (enable_gem_sysbusy) { for (n = 0; n < ncpus; n++) { pthread_join(busy[n].thread, NULL); igt_stats_push(&cycles, busy[n].count); } } igt_stats_init_with_size(&mean, ncpus); igt_stats_init_with_size(&max, ncpus); for (n = 0; n < ncpus; n++) { pthread_join(wait[n].thread, NULL); igt_stats_push_float(&mean, wait[n].mean.mean); igt_stats_push_float(&max, wait[n].mean.max); } if (bg_fs) { pthread_cancel(bg_fs); pthread_join(bg_fs, NULL); } switch (field) { default: printf("gem_syslatency: cycles=%.0f, latency mean=%.3fus max=%.0fus\n", igt_stats_get_mean(&cycles), (igt_stats_get_mean(&mean) - min)/ 1000, (l_estimate(&max) - min) / 1000); break; case 0: printf("%.0f\n", igt_stats_get_mean(&cycles)); break; case 1: printf("%.3f\n", (igt_stats_get_mean(&mean) - min) / 1000); break; case 2: printf("%.0f\n", (l_estimate(&max) - min) / 1000); break; } return 0; }