Lines Matching +full:1 +full:p
41 # define RUSAGE_THREAD 1
47 #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
53 #define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
159 struct params p; member
170 OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"),
174 OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"),
230 for (i = 0; i < g->p.nr_nodes; i++) { in nr_numa_nodes()
279 if (target_cpu == -1) { in bind_to_cpu()
282 for (cpu = 0; cpu < g->p.nr_cpus; cpu++) in bind_to_cpu()
285 BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); in bind_to_cpu()
307 for (cpu = 0; cpu < g->p.nr_cpus; cpu++) in bind_to_node()
340 ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1); in mempol_restore()
353 BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); in bind_to_memnode()
354 nodemask = 1L << node; in bind_to_memnode()
393 buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0); in alloc_data()
394 BUG_ON(buf == (void *)-1); in alloc_data()
400 g->print_once = 1; in alloc_data()
407 g->print_once = 1; in alloc_data()
428 buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1)); in alloc_data()
455 return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random); in zalloc_shared_data()
463 return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); in setup_shared_data()
472 return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); in setup_private_data()
514 if (!g->p.cpu_list_str) in parse_setup_cpu_list()
517 dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); in parse_setup_cpu_list()
519 str0 = str = strdup(g->p.cpu_list_str); in parse_setup_cpu_list()
547 bind_cpu_1 = atol(tok_end + 1); in parse_setup_cpu_list()
550 step = 1; in parse_setup_cpu_list()
553 step = atol(tok_step + 1); in parse_setup_cpu_list()
554 BUG_ON(step <= 0 || step >= g->p.nr_cpus); in parse_setup_cpu_list()
562 bind_len = 1; in parse_setup_cpu_list()
565 bind_len = atol(tok_len + 1); in parse_setup_cpu_list()
566 BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus); in parse_setup_cpu_list()
570 mul = 1; in parse_setup_cpu_list()
573 mul = atol(tok_mul + 1); in parse_setup_cpu_list()
579 if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) { in parse_setup_cpu_list()
580 printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus); in parse_setup_cpu_list()
581 return -1; in parse_setup_cpu_list()
593 if (t >= g->p.nr_tasks) { in parse_setup_cpu_list()
601 if (bind_len > 1) { in parse_setup_cpu_list()
609 BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); in parse_setup_cpu_list()
620 if (t < g->p.nr_tasks) in parse_setup_cpu_list()
621 printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); in parse_setup_cpu_list()
631 return -1; in parse_cpus_opt()
651 if (!g->p.node_list_str) in parse_setup_node_list()
654 dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); in parse_setup_node_list()
656 str0 = str = strdup(g->p.node_list_str); in parse_setup_node_list()
683 bind_node_1 = atol(tok_end + 1); in parse_setup_node_list()
686 step = 1; in parse_setup_node_list()
689 step = atol(tok_step + 1); in parse_setup_node_list()
690 BUG_ON(step <= 0 || step >= g->p.nr_nodes); in parse_setup_node_list()
694 mul = 1; in parse_setup_node_list()
697 mul = atol(tok_mul + 1); in parse_setup_node_list()
703 if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) { in parse_setup_node_list()
704 printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes); in parse_setup_node_list()
705 return -1; in parse_setup_node_list()
715 if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) { in parse_setup_node_list()
735 if (t < g->p.nr_tasks) in parse_setup_node_list()
736 printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t); in parse_setup_node_list()
746 return -1; in parse_nodes_opt()
751 #define BIT(x) (1ul << x)
755 const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31); in lfsr_32()
756 return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps); in lfsr_32()
767 if (g->p.data_reads) in access_data()
769 if (g->p.data_writes) in access_data()
770 *data = val + 1; in access_data()
798 if (g->p.data_zero_memset && !g->p.data_rand_walk) { in do_work()
805 chunk_1 = words/g->p.nr_loops; in do_work()
811 if (g->p.data_rand_walk) { in do_work()
821 end = min(start + 1024, words-1); in do_work()
823 if (g->p.data_zero_memset) { in do_work()
830 } else if (!g->p.data_backwards || (nr + loop) & 1) { in do_work()
834 d = data + off + 1; in do_work()
851 d = data + off - 1; in do_work()
856 d = data + words-1; in do_work()
885 * A count of 1 means that the process is compressed
886 * to a single node. A count of g->p.nr_nodes means it's
895 for (t = 0; t < g->p.nr_threads; t++) { in count_process_nodes()
900 task_nr = process_nr*g->p.nr_threads + t; in count_process_nodes()
904 if (node < 0) /* curr_cpu was likely still -1 */ in count_process_nodes()
907 node_present[node] = 1; in count_process_nodes()
921 * A count of 1 means that the node contains only a single
928 int t, p; in count_node_processes() local
930 for (p = 0; p < g->p.nr_proc; p++) { in count_node_processes()
931 for (t = 0; t < g->p.nr_threads; t++) { in count_node_processes()
936 task_nr = p*g->p.nr_threads + t; in count_node_processes()
953 int p; in calc_convergence_compression() local
955 nodes_min = -1; in calc_convergence_compression()
958 for (p = 0; p < g->p.nr_proc; p++) { in calc_convergence_compression()
959 unsigned int nodes = count_process_nodes(p); in calc_convergence_compression()
971 if (nodes_min == 1 && nodes_max == 1) { in calc_convergence_compression()
972 *strong = 1; in calc_convergence_compression()
994 if (!g->p.show_convergence && !g->p.measure_convergence) in calc_convergence()
997 for (node = 0; node < g->p.nr_nodes; node++) in calc_convergence()
1000 loops_done_min = -1; in calc_convergence()
1003 for (t = 0; t < g->p.nr_tasks; t++) { in calc_convergence()
1023 nr_min = g->p.nr_tasks; in calc_convergence()
1026 for (node = 0; node < g->p.nr_nodes; node++) { in calc_convergence()
1036 BUG_ON(sum > g->p.nr_tasks); in calc_convergence()
1038 if (0 && (sum < g->p.nr_tasks)) in calc_convergence()
1044 * to g->p.nr_proc: in calc_convergence()
1048 for (node = 0; node < g->p.nr_nodes; node++) { in calc_convergence()
1075 if (strong && process_groups == g->p.nr_proc) { in calc_convergence()
1079 if (g->p.measure_convergence) { in calc_convergence()
1095 tprintf("\r # %5.1f%% [%.1f mins]", in show_summary()
1096 (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0); in show_summary()
1100 if (g->p.show_details >= 0) in show_summary()
1112 int details = g->p.show_details; in worker_thread()
1132 thread_data = setup_private_data(g->p.bytes_thread); in worker_thread()
1137 if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1) in worker_thread()
1138 last_task = 1; in worker_thread()
1142 first_task = 1; in worker_thread()
1145 printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n", in worker_thread()
1149 if (g->p.serialize_startup) { in worker_thread()
1153 if (g->nr_tasks_started == g->p.nr_tasks) in worker_thread()
1173 for (l = 0; l < g->p.nr_loops; l++) { in worker_thread()
1179 val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val); in worker_thread()
1180 val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val); in worker_thread()
1181 val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val); in worker_thread()
1183 if (g->p.sleep_usecs) { in worker_thread()
1185 usleep(g->p.sleep_usecs); in worker_thread()
1191 if (g->p.bytes_process_locked) { in worker_thread()
1193 val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val); in worker_thread()
1197 work_done = g->p.bytes_global + g->p.bytes_process + in worker_thread()
1198 g->p.bytes_process_locked + g->p.bytes_thread; in worker_thread()
1203 if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs) in worker_thread()
1211 if (g->p.nr_secs) { in worker_thread()
1213 if ((u32)diff.tv_sec >= g->p.nr_secs) { in worker_thread()
1224 * Perturb the first task's equilibrium every g->p.perturb_secs seconds, in worker_thread()
1227 …if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs)… in worker_thread()
1240 if (this_cpu < g->p.nr_cpus/2) in worker_thread()
1241 target_cpu = g->p.nr_cpus-1; in worker_thread()
1248 if (details >= 1) in worker_thread()
1280 td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0; in worker_thread()
1288 free_data(thread_data, g->p.bytes_thread); in worker_thread()
1317 task_nr = process_nr*g->p.nr_threads; in worker_process()
1323 pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t)); in worker_process()
1324 process_data = setup_private_data(g->p.bytes_process); in worker_process()
1326 if (g->p.show_details >= 3) { in worker_process()
1327 printf(" # process %2d global mem: %p, process mem: %p\n", in worker_process()
1331 for (t = 0; t < g->p.nr_threads; t++) { in worker_process()
1332 task_nr = process_nr*g->p.nr_threads + t; in worker_process()
1340 td->curr_cpu = -1; in worker_process()
1347 for (t = 0; t < g->p.nr_threads; t++) { in worker_process()
1352 free_data(process_data, g->p.bytes_process); in worker_process()
1358 if (g->p.show_details < 0) in print_summary()
1363 g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus); in print_summary()
1365 g->p.nr_loops, g->p.bytes_global/1024/1024); in print_summary()
1367 g->p.nr_loops, g->p.bytes_process/1024/1024); in print_summary()
1369 g->p.nr_loops, g->p.bytes_thread/1024/1024); in print_summary()
1378 ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; in init_thread_data()
1383 for (t = 0; t < g->p.nr_tasks; t++) { in init_thread_data()
1392 for (cpu = 0; cpu < g->p.nr_cpus; cpu++) in init_thread_data()
1399 ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; in deinit_thread_data()
1406 g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0); in init()
1409 g->p = p0; in init()
1411 g->p.nr_cpus = numa_num_configured_cpus(); in init()
1413 g->p.nr_nodes = numa_max_node() + 1; in init()
1416 BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0); in init()
1418 if (g->p.show_quiet && !g->p.show_details) in init()
1419 g->p.show_details = -1; in init()
1422 if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str) in init()
1423 return -1; in init()
1425 if (g->p.mb_global_str) { in init()
1426 g->p.mb_global = atof(g->p.mb_global_str); in init()
1427 BUG_ON(g->p.mb_global < 0); in init()
1430 if (g->p.mb_proc_str) { in init()
1431 g->p.mb_proc = atof(g->p.mb_proc_str); in init()
1432 BUG_ON(g->p.mb_proc < 0); in init()
1435 if (g->p.mb_proc_locked_str) { in init()
1436 g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str); in init()
1437 BUG_ON(g->p.mb_proc_locked < 0); in init()
1438 BUG_ON(g->p.mb_proc_locked > g->p.mb_proc); in init()
1441 if (g->p.mb_thread_str) { in init()
1442 g->p.mb_thread = atof(g->p.mb_thread_str); in init()
1443 BUG_ON(g->p.mb_thread < 0); in init()
1446 BUG_ON(g->p.nr_threads <= 0); in init()
1447 BUG_ON(g->p.nr_proc <= 0); in init()
1449 g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads; in init()
1451 g->p.bytes_global = g->p.mb_global *1024L*1024L; in init()
1452 g->p.bytes_process = g->p.mb_proc *1024L*1024L; in init()
1453 g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L; in init()
1454 g->p.bytes_thread = g->p.mb_thread *1024L*1024L; in init()
1456 g->data = setup_shared_data(g->p.bytes_global); in init()
1469 return -1; in init()
1479 free_data(g->data, g->p.bytes_global); in deinit()
1497 if (!g->p.show_quiet) in print_res()
1514 int i, t, p; in __bench_numa() local
1517 return -1; in __bench_numa()
1519 pids = zalloc(g->p.nr_proc * sizeof(*pids)); in __bench_numa()
1520 pid = -1; in __bench_numa()
1522 if (g->p.serialize_startup) { in __bench_numa()
1529 for (i = 0; i < g->p.nr_proc; i++) { in __bench_numa()
1544 if (g->p.serialize_startup) { in __bench_numa()
1553 while (g->nr_tasks_started != g->p.nr_tasks) in __bench_numa()
1561 threads_ready = (g->nr_tasks_working == g->p.nr_tasks); in __bench_numa()
1564 usleep(1); in __bench_numa()
1591 for (i = 0; i < g->p.nr_proc; i++) { in __bench_numa()
1599 runtime_ns_min = -1LL; in __bench_numa()
1601 for (t = 0; t < g->p.nr_tasks; t++) { in __bench_numa()
1623 runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC; in __bench_numa()
1625 if (g->p.measure_convergence) { in __bench_numa()
1643 print_res(name, bytes / g->p.nr_tasks / 1e9, in __bench_numa()
1646 print_res(name, bytes / 1e9, in __bench_numa()
1649 print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks), in __bench_numa()
1652 print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max, in __bench_numa()
1655 print_res(name, bytes / runtime_sec_max / 1e9, in __bench_numa()
1658 if (g->p.show_details >= 2) { in __bench_numa()
1659 char tname[14 + 2 * 11 + 1]; in __bench_numa()
1661 for (p = 0; p < g->p.nr_proc; p++) { in __bench_numa()
1662 for (t = 0; t < g->p.nr_threads; t++) { in __bench_numa()
1664 td = g->threads + p*g->p.nr_threads + t; in __bench_numa()
1665 snprintf(tname, sizeof(tname), "process%d:thread%d", p, t); in __bench_numa()
1699 static void init_params(struct params *p, const char *name, int argc, const char **argv) in init_params() argument
1710 memset(p, 0, sizeof(*p)); in init_params()
1714 p->serialize_startup = 1; in init_params()
1715 p->data_reads = true; in init_params()
1716 p->data_writes = true; in init_params()
1717 p->data_backwards = true; in init_params()
1718 p->data_rand_walk = true; in init_params()
1719 p->nr_loops = -1; in init_params()
1720 p->init_random = true; in init_params()
1721 p->mb_global_str = "1"; in init_params()
1722 p->nr_proc = 1; in init_params()
1723 p->nr_threads = 1; in init_params()
1724 p->nr_secs = 5; in init_params()
1725 p->run_all = argc == 1; in init_params()
1743 return -1; in run_bench_numa()
1746 #define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk"
1747 #define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1"
1749 #define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1"
1750 #define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1"
1752 #define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1"
1753 #define OPT_BW_NOTHP OPT_BW, "--thp", "-1"
1762 { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
1765 "mem", "-p", "1", "-t", "1", "-P", "1024",
1767 { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
1768 "-C" , "0", "-M", "1", OPT_BW_RAM },
1771 { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
1773 { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
1774 "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
1777 { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024",
1778 "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
1781 { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
1782 { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
1783 { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
1784 { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
1785 { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
1786 { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
1788 "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
1789 { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV },
1790 { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV },
1791 { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV },
1793 "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
1794 { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV },
1795 { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV },
1796 { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV },
1797 { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV },
1798 { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV },
1801 { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW },
1802 { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW },
1803 { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW },
1804 { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW },
1806 "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
1807 { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
1809 { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
1810 { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
1811 { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
1812 { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
1814 { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
1815 { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
1816 { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
1817 { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
1819 "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
1820 { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
1821 { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
1823 { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
1824 { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
1826 { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
1827 { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
1828 { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
1830 "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP },
1843 run_bench_numa(tests[i][0], tests[i] + 1); in bench_all()
1868 return -1; in bench_numa()