From 72d69c2a4ee1fe2a66c3296ad42e60d563ba9a36 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 10 Aug 2020 08:21:59 +0200 Subject: perf bench numa: Fix number of processes in "2x3-convergence" test Signed-off-by: Alexander Gordeev Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/d949f5f48e17fc816f3beecf8479f1b2480345e4.1597004831.git.agordeev@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/bench/numa.c') diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 5797253b9700..3aad1fc6cbca 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1755,7 +1755,7 @@ static const char *tests[][MAX_ARGS] = { { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, - { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, + { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV }, { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, { " 4x4-convergence-NOTHP,", -- cgit v1.2.3 From 85372c6974aad0e40b97513434694abe84c1017e Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 10 Aug 2020 08:22:00 +0200 Subject: perf bench numa: Fix benchmark names Standard benchmark names let users know the tests specifics. For example "2x1-bw-process" name tells that two processes one thread each are run and the RAM bandwidth is measured. Several benchmarks names do not correspond to their actual running configuration. Fix that and also some whitespace and comment inconsistencies. Signed-off-by: Alexander Gordeev Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/6b6f2084f132ee8e9203dc7c32f9deb209b87a68.1597004831.git.agordeev@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'tools/perf/bench/numa.c') diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 3aad1fc6cbca..31e2601d39c8 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -813,12 +813,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val } } } else if (!g->p.data_backwards || (nr + loop) & 1) { + /* Process data forwards: */ d0 = data + off; d = data + off + 1; d1 = data + words; - /* Process data forwards: */ for (;;) { if (unlikely(d >= d1)) d = data; @@ -836,7 +836,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val d = data + off - 1; d1 = data + words; - /* Process data forwards: */ for (;;) { if (unlikely(d < data)) d = data + words-1; @@ -1733,12 +1732,12 @@ err: */ static const char *tests[][MAX_ARGS] = { /* Basic single-stream NUMA bandwidth measurements: */ - { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", + { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "0", OPT_BW_RAM }, { "RAM-bw-local-NOTHP,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, - { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", + { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", "-C" , "0", "-M", "1", OPT_BW_RAM }, /* 2-stream NUMA bandwidth measurements: */ @@ -1780,24 +1779,24 @@ static const char *tests[][MAX_ARGS] = { "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, - { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, - { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, - { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, - { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, + { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, + { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, + { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, + { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, - { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, - { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, - { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, - { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, - { " 4x8-bw-thread-NOTHP,", + { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, + { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, + { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, + { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, + { " 4x8-bw-process-NOTHP,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, - { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, - { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, + { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, + { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, - { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, - { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, + { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, + { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, - { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, + { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, { "numa01-bw-thread-NOTHP,", -- cgit v1.2.3 From 509f68e327d0c87e9bc93cb138e445c506ae9ce9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 13 Aug 2020 13:30:42 +0200 Subject: perf bench numa: Fix cpumask memory leak in node_has_cpus() Couple numa_allocate_cpumask() and numa_free_cpumask() functions Signed-off-by: Alexander Gordeev Reviewed-by: Srikar Dronamraju Cc: Alexander Shishkin Cc: Balamuruhan S Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Satheesh Rajendran Link: http://lore.kernel.org/lkml/20200813113041.GA1685@oc3871087118.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools/perf/bench/numa.c') diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 31e2601d39c8..9066511aed47 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -248,16 +248,21 @@ static int is_node_present(int node) static bool node_has_cpus(int node) { struct bitmask *cpu = numa_allocate_cpumask(); + bool ret = false; /* fall back to nocpus */ unsigned int i; - if (cpu && !numa_node_to_cpus(node, cpu)) { + BUG_ON(!cpu); + if (!numa_node_to_cpus(node, cpu)) { for (i = 0; i < cpu->size; i++) { - if (numa_bitmask_isbitset(cpu, i)) - return true; + if (numa_bitmask_isbitset(cpu, i)) { + ret = true; + break; + } } } + numa_free_cpumask(cpu); - return false; /* lets fall back to nocpus safely */ + return ret; } static cpu_set_t bind_to_cpu(int target_cpu) -- cgit v1.2.3 From 2db13a9b30f7e438777eb1a462c4b055ba948b89 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 13 Aug 2020 13:32:48 +0200 Subject: perf bench numa: Use numa_node_to_cpus() to bind tasks to nodes It is currently assumed that each node contains at most nr_cpus/nr_nodes CPUs and nodes' CPU ranges do not overlap. That assumption is generally incorrect as there are archs where a CPU number does not depend on to its node number. This update removes the described assumption by simply calling numa_node_to_cpus() interface and using the returned mask for binding CPUs to nodes. Also, variable types and names made consistent in functions using cpumask. Signed-off-by: Alexander Gordeev Reviewed-by: Srikar Dronamraju Cc: Alexander Shishkin Cc: Balamuruhan S Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Satheesh Rajendran Link: http://lore.kernel.org/lkml/20200813113247.GA2014@oc3871087118.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'tools/perf/bench/numa.c') diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 9066511aed47..6d5c890478cb 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -247,20 +247,20 @@ static int is_node_present(int node) */ static bool node_has_cpus(int node) { - struct bitmask *cpu = numa_allocate_cpumask(); + struct bitmask *cpumask = numa_allocate_cpumask(); bool ret = false; /* fall back to nocpus */ - unsigned int i; + int cpu; - BUG_ON(!cpu); - if (!numa_node_to_cpus(node, cpu)) { - for (i = 0; i < cpu->size; i++) { - if (numa_bitmask_isbitset(cpu, i)) { + BUG_ON(!cpumask); + if (!numa_node_to_cpus(node, cpumask)) { + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { + if (numa_bitmask_isbitset(cpumask, cpu)) { ret = true; break; } } } - numa_free_cpumask(cpu); + numa_free_cpumask(cpumask); return ret; } @@ -293,14 +293,10 @@ static cpu_set_t bind_to_cpu(int target_cpu) static cpu_set_t bind_to_node(int target_node) { - int cpus_per_node = g->p.nr_cpus / nr_numa_nodes(); cpu_set_t orig_mask, mask; int cpu; int ret; - BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus); - BUG_ON(!cpus_per_node); - ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); BUG_ON(ret); @@ -310,13 +306,16 @@ static cpu_set_t bind_to_node(int target_node) for (cpu = 0; cpu < g->p.nr_cpus; cpu++) CPU_SET(cpu, &mask); } else { - int cpu_start = (target_node + 0) * cpus_per_node; - int cpu_stop = (target_node + 1) * cpus_per_node; - - BUG_ON(cpu_stop > g->p.nr_cpus); + struct bitmask *cpumask = numa_allocate_cpumask(); - for (cpu = cpu_start; cpu < cpu_stop; cpu++) - CPU_SET(cpu, &mask); + BUG_ON(!cpumask); + if (!numa_node_to_cpus(target_node, cpumask)) { + for (cpu = 0; cpu < (int)cpumask->size; cpu++) { + if (numa_bitmask_isbitset(cpumask, cpu)) + CPU_SET(cpu, &mask); + } + } + numa_free_cpumask(cpumask); } ret = sched_setaffinity(0, sizeof(mask), &mask); -- cgit v1.2.3 From a508d061ef04b163a85f55ba8a748c026f89da45 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 14 Aug 2020 18:44:54 +0800 Subject: perf bench numa: Remove dead code in parse_nodes_opt() In the function parse_nodes_opt(), the statement "return 0;" is dead code, remove it. Signed-off-by: Peng Fan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1597401894-27549-1-git-send-email-fanpeng@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/perf/bench/numa.c') diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 6d5c890478cb..f85bceccc459 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -733,8 +733,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused, return -1; return parse_node_list(arg); - - return 0; } #define BIT(x) (1ul << x) -- cgit v1.2.3