#define _GNU_SOURCE #include #include #include #include #include #include // memrw provides a simulation of an application // reading and writing memory, for the sake of tuning helgrind. // It is a very simple (simplistic) model: // * only one thread // * only one exe context reading or writing the memory // * the working set of the application is unrealistically // concentrated on a consecutive nr of MB. // At this moment, it was just used to tune the EvM data structure // of helgrind. // It would be nice to enhance this program to cope with a richer // model e.g. multiple threads, many different stack traces touching // the memory, better working set distribution, ... static int sz_b; // size of a block static int nr_b; // total nr of blocks used by the program static int nr_b_ws; // nr_b in program working set static int nr_loops; // nr of loops reading or writing the ws static int nr_thr; // nr of threads (hardcoded to 1 currently) static int nr_repeat; // nr of times we will allocate, use, then free total+ws // Note: the total nr of MB is what is explicitely allocated. // On top of that, we have the stacks, local vars, lib vars, ... // The working set is just the first nr_b_ws blocks of nr_b. static int verbose = 0; static unsigned char **t_b; // Pointers to all blocks static void *memrw_fn(void *v) { int loops, m, b; int dowrite; int differs = 0; unsigned char prev = 0; for (loops = 0; loops < nr_loops; loops++) { // printf("loop %d dowrite %d\n", loops, dowrite); // Note: in case of multiple threads, we will have // to add lock/unlock somewhere in the below, maybe to lock // the MB we are reading or writing. for (m = 0; m < nr_b_ws; m++) { for (b = 0; b < sz_b; b++) { dowrite = b % 5 == 0; // Do some write or read operations. if (dowrite) { if (t_b[m][b] < 255) t_b[m][b] += differs; else t_b[m][b] = 0; } else { differs = t_b[m][b] != prev; prev = t_b[m][b]; } } } } return NULL; } int main (int argc, char *argv[]) { int a; int ret; int i; int r; pthread_t thr; // usage: memrw [-b blocksize default 1MB ] // [-t nr_b default 10] [-w nr_b_ws default 10] // [-l nr_loops_on_ws default 3] // [-r nr_repeat default 1] // [-f fan_out default 0] // [-v verbosity default 0] sz_b = 1024 * 1024; nr_b = 10; nr_b_ws = 10; nr_loops = 3; nr_repeat = 1; verbose = 0; for (a = 1; a < argc; a+=2) { if (strcmp(argv[a], "-b") == 0) { sz_b = atoi(argv[a+1]); } else if (strcmp(argv[a], "-t") == 0) { nr_b = atoi(argv[a+1]); } else if (strcmp(argv[a], "-w") == 0) { nr_b_ws = atoi(argv[a+1]); } else if (strcmp(argv[a], "-l") == 0) { nr_loops = atoi(argv[a+1]); } else if (strcmp(argv[a], "-r") == 0) { nr_repeat = atoi(argv[a+1]); } else if (strcmp(argv[a], "-v") == 0) { verbose = atoi(argv[a+1]); } else { printf("unknown arg %s\n", argv[a]); } } if (nr_b_ws > nr_b) nr_b_ws = nr_b; // to make it easy to do loops combining values nr_thr = 1; printf ("total program memory -t %llu MB" " working set -w %llu MB\n", ((unsigned long long)nr_b * sz_b) / (unsigned long long) (1024*1024), ((unsigned long long)nr_b_ws * sz_b) / (unsigned long long)(1024*1024)); printf (" working set R or W -l %d times" " repeat the whole stuff -r %d times\n", nr_loops, nr_repeat); for (r = 0; r < nr_repeat; r++) { printf ("creating and initialising the total program memory\n"); t_b = malloc(nr_b * sizeof(char*)); if (t_b == NULL) perror("malloc t_b"); for (i = 0; i < nr_b; i++) { t_b[i] = calloc(sz_b, 1); if (t_b[i] == NULL) perror("malloc t_b[i]"); } printf("starting thread that will read or write the working set\n"); ret = pthread_create(&thr, NULL, memrw_fn, &nr_thr); if (ret != 0) perror("pthread_create"); printf("waiting for thread termination\n"); ret = pthread_join(thr, NULL); if (ret != 0) perror("pthread_join"); printf("thread terminated\n"); /* Now, free the memory used, for the next repeat */ for (i = 0; i < nr_b; i++) free (t_b[i]); free (t_b); printf("memory freed\n"); } return 0; }