1 | /* Profile heap and stack memory usage of running program. |
2 | Copyright (C) 1998-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <assert.h> |
20 | #include <dlfcn.h> |
21 | #include <errno.h> |
22 | #include <error.h> |
23 | #include <fcntl.h> |
24 | #include <libintl.h> |
25 | #include <stdatomic.h> |
26 | #include <stdbool.h> |
27 | #include <stdio.h> |
28 | #include <stdlib.h> |
29 | #include <stdarg.h> |
30 | #include <sys/mman.h> |
31 | #include <sys/time.h> |
32 | #include <unistd.h> |
33 | #include <unistd_ext.h> |
34 | |
35 | #include <hp-timing.h> |
36 | #include <machine-sp.h> |
37 | #include <stackinfo.h> /* For _STACK_GROWS_UP */ |
38 | |
39 | /* Pointer to the real functions. These are determined used `dlsym' |
40 | when really needed. */ |
41 | static void *(*mallocp)(size_t); |
42 | static void *(*reallocp) (void *, size_t); |
43 | static void *(*callocp) (size_t, size_t); |
44 | static void (*freep) (void *); |
45 | |
46 | static void *(*mmapp) (void *, size_t, int, int, int, off_t); |
47 | static void *(*mmap64p) (void *, size_t, int, int, int, off64_t); |
48 | static int (*munmapp) (void *, size_t); |
49 | static void *(*mremapp) (void *, size_t, size_t, int, void *); |
50 | |
51 | enum |
52 | { |
53 | idx_malloc = 0, |
54 | idx_realloc, |
55 | idx_calloc, |
56 | idx_free, |
57 | idx_mmap_r, |
58 | idx_mmap_w, |
59 | idx_mmap_a, |
60 | idx_mremap, |
61 | idx_munmap, |
62 | idx_last |
63 | }; |
64 | |
65 | |
66 | struct |
67 | { |
68 | size_t ; |
69 | size_t ; |
70 | }; |
71 | |
72 | #define MAGIC 0xfeedbeaf |
73 | |
74 | |
75 | static _Atomic unsigned long int calls[idx_last]; |
76 | static _Atomic unsigned long int failed[idx_last]; |
77 | static _Atomic size_t total[idx_last]; |
78 | static _Atomic size_t grand_total; |
79 | static _Atomic unsigned long int histogram[65536 / 16]; |
80 | static _Atomic unsigned long int large; |
81 | static _Atomic unsigned long int calls_total; |
82 | static _Atomic unsigned long int inplace; |
83 | static _Atomic unsigned long int decreasing; |
84 | static _Atomic unsigned long int realloc_free; |
85 | static _Atomic unsigned long int inplace_mremap; |
86 | static _Atomic unsigned long int decreasing_mremap; |
87 | static _Atomic size_t current_heap; |
88 | static _Atomic size_t peak_use[3]; |
89 | static __thread uintptr_t start_sp; |
90 | |
91 | /* A few macros to make the source more readable. */ |
92 | #define peak_heap peak_use[0] |
93 | #define peak_stack peak_use[1] |
94 | #define peak_total peak_use[2] |
95 | |
96 | #define DEFAULT_BUFFER_SIZE 32768 |
97 | static size_t buffer_size; |
98 | |
99 | static int fd = -1; |
100 | |
101 | static bool not_me; |
102 | static int initialized; |
103 | static bool trace_mmap; |
104 | extern const char *__progname; |
105 | |
106 | struct entry |
107 | { |
108 | uint64_t heap; |
109 | uint64_t stack; |
110 | uint32_t time_low; |
111 | uint32_t time_high; |
112 | }; |
113 | |
114 | static struct entry buffer[2 * DEFAULT_BUFFER_SIZE]; |
115 | static _Atomic uint32_t buffer_cnt; |
116 | static struct entry first; |
117 | |
118 | static void |
119 | gettime (struct entry *e) |
120 | { |
121 | #if HP_TIMING_INLINE |
122 | hp_timing_t now; |
123 | HP_TIMING_NOW (now); |
124 | e->time_low = now & 0xffffffff; |
125 | e->time_high = now >> 32; |
126 | #else |
127 | struct __timespec64 now; |
128 | uint64_t usecs; |
129 | __clock_gettime64 (CLOCK_REALTIME, &now); |
130 | usecs = (uint64_t)now.tv_nsec / 1000 + (uint64_t)now.tv_sec * 1000000; |
131 | e->time_low = usecs & 0xffffffff; |
132 | e->time_high = usecs >> 32; |
133 | #endif |
134 | } |
135 | |
136 | static inline void |
137 | peak_atomic_max (_Atomic size_t *peak, size_t val) |
138 | { |
139 | size_t v; |
140 | do |
141 | { |
142 | v = atomic_load_explicit (peak, memory_order_relaxed); |
143 | if (v >= val) |
144 | break; |
145 | } |
146 | while (! atomic_compare_exchange_weak (peak, &v, val)); |
147 | } |
148 | |
149 | /* Update the global data after a successful function call. */ |
150 | static void |
151 | update_data (struct header *result, size_t len, size_t old_len) |
152 | { |
153 | if (result != NULL) |
154 | { |
155 | /* Record the information we need and mark the block using a |
156 | magic number. */ |
157 | result->length = len; |
158 | result->magic = MAGIC; |
159 | } |
160 | |
161 | /* Compute current heap usage and compare it with the maximum value. */ |
162 | size_t heap |
163 | = atomic_fetch_add_explicit (¤t_heap, len - old_len, |
164 | memory_order_relaxed) + len - old_len; |
165 | peak_atomic_max (&peak_heap, heap); |
166 | |
167 | /* Compute current stack usage and compare it with the maximum |
168 | value. The base stack pointer might not be set if this is not |
169 | the main thread and it is the first call to any of these |
170 | functions. */ |
171 | if (__glibc_unlikely (!start_sp)) |
172 | start_sp = __thread_stack_pointer (); |
173 | |
174 | uintptr_t sp = __thread_stack_pointer (); |
175 | #ifdef _STACK_GROWS_UP |
176 | /* This can happen in threads where we didn't catch the thread's |
177 | stack early enough. */ |
178 | if (__glibc_unlikely (sp < start_sp)) |
179 | start_sp = sp; |
180 | size_t current_stack = sp - start_sp; |
181 | #else |
182 | /* This can happen in threads where we didn't catch the thread's |
183 | stack early enough. */ |
184 | if (__glibc_unlikely (sp > start_sp)) |
185 | start_sp = sp; |
186 | size_t current_stack = start_sp - sp; |
187 | #endif |
188 | peak_atomic_max (&peak_stack, current_stack); |
189 | |
190 | /* Add up heap and stack usage and compare it with the maximum value. */ |
191 | peak_atomic_max (&peak_total, heap + current_stack); |
192 | |
193 | /* Store the value only if we are writing to a file. */ |
194 | if (fd != -1) |
195 | { |
196 | uint32_t idx = atomic_fetch_add_explicit (&buffer_cnt, 1, |
197 | memory_order_relaxed); |
198 | if (idx + 1 >= 2 * buffer_size) |
199 | { |
200 | /* We try to reset the counter to the correct range. If |
201 | this fails because of another thread increasing the |
202 | counter it does not matter since that thread will take |
203 | care of the correction. */ |
204 | uint32_t reset = (idx + 1) % (2 * buffer_size); |
205 | uint32_t expected = idx + 1; |
206 | atomic_compare_exchange_weak (&buffer_cnt, &expected, reset); |
207 | if (idx >= 2 * buffer_size) |
208 | idx = reset - 1; |
209 | } |
210 | assert (idx < 2 * DEFAULT_BUFFER_SIZE); |
211 | |
212 | buffer[idx].heap = current_heap; |
213 | buffer[idx].stack = current_stack; |
214 | gettime (&buffer[idx]); |
215 | |
216 | /* Write out buffer if it is full. */ |
217 | if (idx + 1 == buffer_size || idx + 1 == 2 * buffer_size) |
218 | { |
219 | uint32_t write_size = buffer_size * sizeof (buffer[0]); |
220 | write_all (fd, &buffer[idx + 1 - buffer_size], write_size); |
221 | } |
222 | } |
223 | } |
224 | |
225 | |
226 | /* Interrupt handler. */ |
227 | static void |
228 | int_handler (int signo) |
229 | { |
230 | /* Nothing gets allocated. Just record the stack pointer position. */ |
231 | update_data (NULL, 0, 0); |
232 | } |
233 | |
234 | |
235 | /* Find out whether this is the program we are supposed to profile. |
236 | For this the name in the variable `__progname' must match the one |
237 | given in the environment variable MEMUSAGE_PROG_NAME. If the variable |
238 | is not present every program assumes it should be profiling. |
239 | |
240 | If this is the program open a file descriptor to the output file. |
241 | We will write to it whenever the buffer overflows. The name of the |
242 | output file is determined by the environment variable MEMUSAGE_OUTPUT. |
243 | |
244 | If the environment variable MEMUSAGE_BUFFER_SIZE is set its numerical |
245 | value determines the size of the internal buffer. The number gives |
246 | the number of elements in the buffer. By setting the number to one |
247 | one effectively selects unbuffered operation. |
248 | |
249 | If MEMUSAGE_NO_TIMER is not present an alarm handler is installed |
250 | which at the highest possible frequency records the stack pointer. */ |
251 | static void |
252 | me (void) |
253 | { |
254 | const char *env = getenv ("MEMUSAGE_PROG_NAME" ); |
255 | size_t prog_len = strlen (__progname); |
256 | |
257 | initialized = -1; |
258 | mallocp = (void *(*)(size_t))dlsym (RTLD_NEXT, "malloc" ); |
259 | reallocp = (void *(*)(void *, size_t))dlsym (RTLD_NEXT, "realloc" ); |
260 | callocp = (void *(*)(size_t, size_t))dlsym (RTLD_NEXT, "calloc" ); |
261 | freep = (void (*)(void *))dlsym (RTLD_NEXT, "free" ); |
262 | |
263 | mmapp = (void *(*)(void *, size_t, int, int, int, off_t))dlsym (RTLD_NEXT, |
264 | "mmap" ); |
265 | mmap64p = |
266 | (void *(*)(void *, size_t, int, int, int, off64_t))dlsym (RTLD_NEXT, |
267 | "mmap64" ); |
268 | mremapp = (void *(*)(void *, size_t, size_t, int, void *))dlsym (RTLD_NEXT, |
269 | "mremap" ); |
270 | munmapp = (int (*)(void *, size_t))dlsym (RTLD_NEXT, "munmap" ); |
271 | initialized = 1; |
272 | |
273 | if (env != NULL) |
274 | { |
275 | /* Check for program name. */ |
276 | size_t len = strlen (env); |
277 | if (len > prog_len || strcmp (env, &__progname[prog_len - len]) != 0 |
278 | || (prog_len != len && __progname[prog_len - len - 1] != '/')) |
279 | not_me = true; |
280 | } |
281 | |
282 | /* Only open the file if it's really us. */ |
283 | if (!not_me && fd == -1) |
284 | { |
285 | const char *outname; |
286 | |
287 | if (!start_sp) |
288 | start_sp = __thread_stack_pointer (); |
289 | |
290 | outname = getenv ("MEMUSAGE_OUTPUT" ); |
291 | if (outname != NULL && outname[0] != '\0' |
292 | && (access (outname, R_OK | W_OK) == 0 || errno == ENOENT)) |
293 | { |
294 | fd = creat64 (outname, 0666); |
295 | |
296 | if (fd == -1) |
297 | /* Don't do anything in future calls if we cannot write to |
298 | the output file. */ |
299 | not_me = true; |
300 | else |
301 | { |
302 | /* Write the first entry. */ |
303 | first.heap = 0; |
304 | first.stack = 0; |
305 | gettime (&first); |
306 | /* Write it two times since we need the starting and end time. */ |
307 | write_all (fd, &first, sizeof (first)); |
308 | write_all (fd, &first, sizeof (first)); |
309 | |
310 | /* Determine the buffer size. We use the default if the |
311 | environment variable is not present. */ |
312 | buffer_size = DEFAULT_BUFFER_SIZE; |
313 | const char *str_buffer_size = getenv ("MEMUSAGE_BUFFER_SIZE" ); |
314 | if (str_buffer_size != NULL) |
315 | { |
316 | buffer_size = atoi (str_buffer_size); |
317 | if (buffer_size == 0 || buffer_size > DEFAULT_BUFFER_SIZE) |
318 | buffer_size = DEFAULT_BUFFER_SIZE; |
319 | } |
320 | |
321 | /* Possibly enable timer-based stack pointer retrieval. */ |
322 | if (getenv ("MEMUSAGE_NO_TIMER" ) == NULL) |
323 | { |
324 | struct sigaction act; |
325 | |
326 | act.sa_handler = (sighandler_t) &int_handler; |
327 | act.sa_flags = SA_RESTART; |
328 | sigfillset (&act.sa_mask); |
329 | |
330 | if (sigaction (SIGPROF, &act, NULL) >= 0) |
331 | { |
332 | struct itimerval timer; |
333 | |
334 | timer.it_value.tv_sec = 0; |
335 | timer.it_value.tv_usec = 1; |
336 | timer.it_interval = timer.it_value; |
337 | setitimer (ITIMER_PROF, &timer, NULL); |
338 | } |
339 | } |
340 | } |
341 | } |
342 | |
343 | if (!not_me && getenv ("MEMUSAGE_TRACE_MMAP" ) != NULL) |
344 | trace_mmap = true; |
345 | } |
346 | } |
347 | |
348 | |
349 | /* Record the initial stack position. */ |
350 | static void |
351 | __attribute__ ((constructor)) |
352 | init (void) |
353 | { |
354 | start_sp = __thread_stack_pointer (); |
355 | if (!initialized) |
356 | me (); |
357 | } |
358 | |
359 | |
360 | /* `malloc' replacement. We keep track of the memory usage if this is the |
361 | correct program. */ |
362 | void * |
363 | malloc (size_t len) |
364 | { |
365 | struct header *result = NULL; |
366 | |
367 | /* Determine real implementation if not already happened. */ |
368 | if (__glibc_unlikely (initialized <= 0)) |
369 | { |
370 | if (initialized == -1) |
371 | return NULL; |
372 | |
373 | me (); |
374 | } |
375 | |
376 | /* If this is not the correct program just use the normal function. */ |
377 | if (not_me) |
378 | return (*mallocp)(len); |
379 | |
380 | /* Keep track of number of calls. */ |
381 | atomic_fetch_add_explicit (&calls[idx_malloc], 1, memory_order_relaxed); |
382 | /* Keep track of total memory consumption for `malloc'. */ |
383 | atomic_fetch_add_explicit (&total[idx_malloc], len, memory_order_relaxed); |
384 | /* Keep track of total memory requirement. */ |
385 | atomic_fetch_add_explicit (&grand_total, len, memory_order_relaxed); |
386 | /* Remember the size of the request. */ |
387 | if (len < 65536) |
388 | atomic_fetch_add_explicit (&histogram[len / 16], 1, memory_order_relaxed); |
389 | else |
390 | atomic_fetch_add_explicit (&large, 1, memory_order_relaxed); |
391 | /* Total number of calls of any of the functions. */ |
392 | atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed); |
393 | |
394 | /* Do the real work. */ |
395 | result = (struct header *) (*mallocp)(len + sizeof (struct header)); |
396 | if (result == NULL) |
397 | { |
398 | atomic_fetch_add_explicit (&failed[idx_malloc], 1, |
399 | memory_order_relaxed); |
400 | return NULL; |
401 | } |
402 | |
403 | /* Update the allocation data and write out the records if necessary. */ |
404 | update_data (result, len, 0); |
405 | |
406 | /* Return the pointer to the user buffer. */ |
407 | return (void *) (result + 1); |
408 | } |
409 | |
410 | |
411 | /* `realloc' replacement. We keep track of the memory usage if this is the |
412 | correct program. */ |
413 | void * |
414 | realloc (void *old, size_t len) |
415 | { |
416 | struct header *result = NULL; |
417 | struct header *real; |
418 | size_t old_len; |
419 | |
420 | /* Determine real implementation if not already happened. */ |
421 | if (__glibc_unlikely (initialized <= 0)) |
422 | { |
423 | if (initialized == -1) |
424 | return NULL; |
425 | |
426 | me (); |
427 | } |
428 | |
429 | /* If this is not the correct program just use the normal function. */ |
430 | if (not_me) |
431 | return (*reallocp)(old, len); |
432 | |
433 | if (old == NULL) |
434 | { |
435 | /* This is really a `malloc' call. */ |
436 | real = NULL; |
437 | old_len = 0; |
438 | } |
439 | else |
440 | { |
441 | real = ((struct header *) old) - 1; |
442 | if (real->magic != MAGIC) |
443 | /* This is no memory allocated here. */ |
444 | return (*reallocp)(old, len); |
445 | |
446 | old_len = real->length; |
447 | } |
448 | |
449 | /* Keep track of number of calls. */ |
450 | atomic_fetch_add_explicit (&calls[idx_realloc], 1, memory_order_relaxed); |
451 | if (len > old_len) |
452 | { |
453 | /* Keep track of total memory consumption for `realloc'. */ |
454 | atomic_fetch_add_explicit (&total[idx_realloc], len - old_len, |
455 | memory_order_relaxed); |
456 | /* Keep track of total memory requirement. */ |
457 | atomic_fetch_add_explicit (&grand_total, len - old_len, |
458 | memory_order_relaxed); |
459 | } |
460 | |
461 | if (len == 0 && old != NULL) |
462 | { |
463 | /* Special case. */ |
464 | atomic_fetch_add_explicit (&realloc_free, 1, memory_order_relaxed); |
465 | /* Keep track of total memory freed using `free'. */ |
466 | atomic_fetch_add_explicit (&total[idx_free], real->length, |
467 | memory_order_relaxed); |
468 | |
469 | /* Update the allocation data and write out the records if necessary. */ |
470 | update_data (NULL, 0, old_len); |
471 | |
472 | /* Do the real work. */ |
473 | (*freep) (real); |
474 | |
475 | return NULL; |
476 | } |
477 | |
478 | /* Remember the size of the request. */ |
479 | if (len < 65536) |
480 | atomic_fetch_add_explicit (&histogram[len / 16], 1, memory_order_relaxed); |
481 | else |
482 | atomic_fetch_add_explicit (&large, 1, memory_order_relaxed); |
483 | /* Total number of calls of any of the functions. */ |
484 | atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed); |
485 | |
486 | /* Do the real work. */ |
487 | result = (struct header *) (*reallocp)(real, len + sizeof (struct header)); |
488 | if (result == NULL) |
489 | { |
490 | atomic_fetch_add_explicit (&failed[idx_realloc], 1, |
491 | memory_order_relaxed); |
492 | return NULL; |
493 | } |
494 | |
495 | /* Record whether the reduction/increase happened in place. */ |
496 | if (real == result) |
497 | atomic_fetch_add_explicit (&inplace, 1, memory_order_relaxed); |
498 | /* Was the buffer increased? */ |
499 | if (old_len > len) |
500 | atomic_fetch_add_explicit (&decreasing, 1, memory_order_relaxed); |
501 | |
502 | /* Update the allocation data and write out the records if necessary. */ |
503 | update_data (result, len, old_len); |
504 | |
505 | /* Return the pointer to the user buffer. */ |
506 | return (void *) (result + 1); |
507 | } |
508 | |
509 | |
510 | /* `calloc' replacement. We keep track of the memory usage if this is the |
511 | correct program. */ |
512 | void * |
513 | calloc (size_t n, size_t len) |
514 | { |
515 | struct header *result; |
516 | size_t size = n * len; |
517 | |
518 | /* Determine real implementation if not already happened. */ |
519 | if (__glibc_unlikely (initialized <= 0)) |
520 | { |
521 | if (initialized == -1) |
522 | return NULL; |
523 | |
524 | me (); |
525 | } |
526 | |
527 | /* If this is not the correct program just use the normal function. */ |
528 | if (not_me) |
529 | return (*callocp)(n, len); |
530 | |
531 | /* Keep track of number of calls. */ |
532 | atomic_fetch_add_explicit (&calls[idx_calloc], 1, memory_order_relaxed); |
533 | /* Keep track of total memory consumption for `calloc'. */ |
534 | atomic_fetch_add_explicit (&total[idx_calloc], size, memory_order_relaxed); |
535 | /* Keep track of total memory requirement. */ |
536 | atomic_fetch_add_explicit (&grand_total, size, memory_order_relaxed); |
537 | /* Remember the size of the request. */ |
538 | if (size < 65536) |
539 | atomic_fetch_add_explicit (&histogram[size / 16], 1, |
540 | memory_order_relaxed); |
541 | else |
542 | atomic_fetch_add_explicit (&large, 1, memory_order_relaxed); |
543 | /* Total number of calls of any of the functions. */ |
544 | ++calls_total; |
545 | |
546 | /* Do the real work. */ |
547 | result = (struct header *) (*mallocp)(size + sizeof (struct header)); |
548 | if (result == NULL) |
549 | { |
550 | atomic_fetch_add_explicit (&failed[idx_calloc], 1, |
551 | memory_order_relaxed); |
552 | return NULL; |
553 | } |
554 | |
555 | /* Update the allocation data and write out the records if necessary. */ |
556 | update_data (result, size, 0); |
557 | |
558 | /* Do what `calloc' would have done and return the buffer to the caller. */ |
559 | return memset (result + 1, '\0', size); |
560 | } |
561 | |
562 | |
563 | /* `free' replacement. We keep track of the memory usage if this is the |
564 | correct program. */ |
565 | void |
566 | free (void *ptr) |
567 | { |
568 | struct header *real; |
569 | |
570 | /* Determine real implementation if not already happened. */ |
571 | if (__glibc_unlikely (initialized <= 0)) |
572 | { |
573 | if (initialized == -1) |
574 | return; |
575 | |
576 | me (); |
577 | } |
578 | |
579 | /* If this is not the correct program just use the normal function. */ |
580 | if (not_me) |
581 | { |
582 | (*freep) (ptr); |
583 | return; |
584 | } |
585 | |
586 | /* `free (NULL)' has no effect. */ |
587 | if (ptr == NULL) |
588 | { |
589 | atomic_fetch_add_explicit (&calls[idx_free], 1, memory_order_relaxed); |
590 | return; |
591 | } |
592 | |
593 | /* Determine the pointer to the header. */ |
594 | real = ((struct header *) ptr) - 1; |
595 | if (real->magic != MAGIC) |
596 | { |
597 | /* This block wasn't allocated here. */ |
598 | (*freep) (ptr); |
599 | return; |
600 | } |
601 | |
602 | /* Keep track of number of calls. */ |
603 | atomic_fetch_add_explicit (&calls[idx_free], 1, memory_order_relaxed); |
604 | /* Keep track of total memory freed using `free'. */ |
605 | atomic_fetch_add_explicit (&total[idx_free], real->length, |
606 | memory_order_relaxed); |
607 | |
608 | /* Update the allocation data and write out the records if necessary. */ |
609 | update_data (NULL, 0, real->length); |
610 | |
611 | /* Do the real work. */ |
612 | (*freep) (real); |
613 | } |
614 | |
615 | |
616 | /* `mmap' replacement. We do not have to keep track of the size since |
617 | `munmap' will get it as a parameter. */ |
618 | void * |
619 | mmap (void *start, size_t len, int prot, int flags, int fd, off_t offset) |
620 | { |
621 | void *result = NULL; |
622 | |
623 | /* Determine real implementation if not already happened. */ |
624 | if (__glibc_unlikely (initialized <= 0)) |
625 | { |
626 | if (initialized == -1) |
627 | return NULL; |
628 | |
629 | me (); |
630 | } |
631 | |
632 | /* Always get a block. We don't need extra memory. */ |
633 | result = (*mmapp)(start, len, prot, flags, fd, offset); |
634 | |
635 | if (!not_me && trace_mmap) |
636 | { |
637 | int idx = (flags & MAP_ANON |
638 | ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r); |
639 | |
640 | /* Keep track of number of calls. */ |
641 | atomic_fetch_add_explicit (&calls[idx], 1, memory_order_relaxed); |
642 | /* Keep track of total memory consumption for `malloc'. */ |
643 | atomic_fetch_add_explicit (&total[idx], len, memory_order_relaxed); |
644 | /* Keep track of total memory requirement. */ |
645 | atomic_fetch_add_explicit (&grand_total, len, memory_order_relaxed); |
646 | /* Remember the size of the request. */ |
647 | if (len < 65536) |
648 | atomic_fetch_add_explicit (&histogram[len / 16], 1, |
649 | memory_order_relaxed); |
650 | else |
651 | atomic_fetch_add_explicit (&large, 1, memory_order_relaxed); |
652 | /* Total number of calls of any of the functions. */ |
653 | atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed); |
654 | |
655 | /* Check for failures. */ |
656 | if (result == NULL) |
657 | atomic_fetch_add_explicit (&failed[idx], 1, memory_order_relaxed); |
658 | else if (idx == idx_mmap_w) |
659 | /* Update the allocation data and write out the records if |
660 | necessary. Note the first parameter is NULL which means |
661 | the size is not tracked. */ |
662 | update_data (NULL, len, 0); |
663 | } |
664 | |
665 | /* Return the pointer to the user buffer. */ |
666 | return result; |
667 | } |
668 | |
669 | |
670 | /* `mmap64' replacement. We do not have to keep track of the size since |
671 | `munmap' will get it as a parameter. */ |
672 | void * |
673 | mmap64 (void *start, size_t len, int prot, int flags, int fd, off64_t offset) |
674 | { |
675 | void *result = NULL; |
676 | |
677 | /* Determine real implementation if not already happened. */ |
678 | if (__glibc_unlikely (initialized <= 0)) |
679 | { |
680 | if (initialized == -1) |
681 | return NULL; |
682 | |
683 | me (); |
684 | } |
685 | |
686 | /* Always get a block. We don't need extra memory. */ |
687 | result = (*mmap64p)(start, len, prot, flags, fd, offset); |
688 | |
689 | if (!not_me && trace_mmap) |
690 | { |
691 | int idx = (flags & MAP_ANON |
692 | ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r); |
693 | |
694 | /* Keep track of number of calls. */ |
695 | atomic_fetch_add_explicit (&calls[idx], 1, memory_order_relaxed); |
696 | /* Keep track of total memory consumption for `malloc'. */ |
697 | atomic_fetch_add_explicit (&total[idx], len, memory_order_relaxed); |
698 | /* Keep track of total memory requirement. */ |
699 | atomic_fetch_add_explicit (&grand_total, len, memory_order_relaxed); |
700 | /* Remember the size of the request. */ |
701 | if (len < 65536) |
702 | atomic_fetch_add_explicit (&histogram[len / 16], 1, |
703 | memory_order_relaxed); |
704 | else |
705 | atomic_fetch_add_explicit (&large, 1, memory_order_relaxed); |
706 | /* Total number of calls of any of the functions. */ |
707 | atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed); |
708 | |
709 | /* Check for failures. */ |
710 | if (result == NULL) |
711 | atomic_fetch_add_explicit (&failed[idx], 1, memory_order_relaxed); |
712 | else if (idx == idx_mmap_w) |
713 | /* Update the allocation data and write out the records if |
714 | necessary. Note the first parameter is NULL which means |
715 | the size is not tracked. */ |
716 | update_data (NULL, len, 0); |
717 | } |
718 | |
719 | /* Return the pointer to the user buffer. */ |
720 | return result; |
721 | } |
722 | |
723 | |
724 | /* `mremap' replacement. We do not have to keep track of the size since |
725 | `munmap' will get it as a parameter. */ |
726 | void * |
727 | mremap (void *start, size_t old_len, size_t len, int flags, ...) |
728 | { |
729 | void *result = NULL; |
730 | va_list ap; |
731 | |
732 | va_start (ap, flags); |
733 | void *newaddr = (flags & MREMAP_FIXED) ? va_arg (ap, void *) : NULL; |
734 | va_end (ap); |
735 | |
736 | /* Determine real implementation if not already happened. */ |
737 | if (__glibc_unlikely (initialized <= 0)) |
738 | { |
739 | if (initialized == -1) |
740 | return NULL; |
741 | |
742 | me (); |
743 | } |
744 | |
745 | /* Always get a block. We don't need extra memory. */ |
746 | result = (*mremapp)(start, old_len, len, flags, newaddr); |
747 | |
748 | if (!not_me && trace_mmap) |
749 | { |
750 | /* Keep track of number of calls. */ |
751 | atomic_fetch_add_explicit (&calls[idx_mremap], 1, memory_order_relaxed); |
752 | if (len > old_len) |
753 | { |
754 | /* Keep track of total memory consumption for `malloc'. */ |
755 | atomic_fetch_add_explicit (&total[idx_mremap], len - old_len, |
756 | memory_order_relaxed); |
757 | /* Keep track of total memory requirement. */ |
758 | atomic_fetch_add_explicit (&grand_total, len - old_len, |
759 | memory_order_relaxed); |
760 | } |
761 | /* Remember the size of the request. */ |
762 | if (len < 65536) |
763 | atomic_fetch_add_explicit (&histogram[len / 16], 1, |
764 | memory_order_relaxed); |
765 | else |
766 | atomic_fetch_add_explicit (&large, 1, memory_order_relaxed); |
767 | /* Total number of calls of any of the functions. */ |
768 | atomic_fetch_add_explicit (&calls_total, 1, memory_order_relaxed); |
769 | |
770 | /* Check for failures. */ |
771 | if (result == NULL) |
772 | atomic_fetch_add_explicit (&failed[idx_mremap], 1, |
773 | memory_order_relaxed); |
774 | else |
775 | { |
776 | /* Record whether the reduction/increase happened in place. */ |
777 | if (start == result) |
778 | atomic_fetch_add_explicit (&inplace_mremap, 1, |
779 | memory_order_relaxed); |
780 | /* Was the buffer increased? */ |
781 | if (old_len > len) |
782 | atomic_fetch_add_explicit (&decreasing_mremap, 1, |
783 | memory_order_relaxed); |
784 | |
785 | /* Update the allocation data and write out the records if |
786 | necessary. Note the first parameter is NULL which means |
787 | the size is not tracked. */ |
788 | update_data (NULL, len, old_len); |
789 | } |
790 | } |
791 | |
792 | /* Return the pointer to the user buffer. */ |
793 | return result; |
794 | } |
795 | |
796 | |
797 | /* `munmap' replacement. */ |
798 | int |
799 | munmap (void *start, size_t len) |
800 | { |
801 | int result; |
802 | |
803 | /* Determine real implementation if not already happened. */ |
804 | if (__glibc_unlikely (initialized <= 0)) |
805 | { |
806 | if (initialized == -1) |
807 | return -1; |
808 | |
809 | me (); |
810 | } |
811 | |
812 | /* Do the real work. */ |
813 | result = (*munmapp)(start, len); |
814 | |
815 | if (!not_me && trace_mmap) |
816 | { |
817 | /* Keep track of number of calls. */ |
818 | atomic_fetch_add_explicit (&calls[idx_munmap], 1, memory_order_relaxed); |
819 | |
820 | if (__glibc_likely (result == 0)) |
821 | { |
822 | /* Keep track of total memory freed using `free'. */ |
823 | atomic_fetch_add_explicit (&total[idx_munmap], len, |
824 | memory_order_relaxed); |
825 | |
826 | /* Update the allocation data and write out the records if |
827 | necessary. */ |
828 | update_data (NULL, 0, len); |
829 | } |
830 | else |
831 | atomic_fetch_add_explicit (&failed[idx_munmap], 1, |
832 | memory_order_relaxed); |
833 | } |
834 | |
835 | return result; |
836 | } |
837 | |
838 | |
839 | /* Write some statistics to standard error. */ |
840 | static void |
841 | __attribute__ ((destructor)) |
842 | dest (void) |
843 | { |
844 | int percent, cnt; |
845 | unsigned long int maxcalls; |
846 | |
847 | /* If we haven't done anything here just return. */ |
848 | if (not_me) |
849 | return; |
850 | |
851 | /* If we should call any of the memory functions don't do any profiling. */ |
852 | not_me = true; |
853 | |
854 | /* Finish the output file. */ |
855 | if (fd != -1) |
856 | { |
857 | /* Write the partially filled buffer. */ |
858 | struct entry *start = buffer; |
859 | uint32_t write_cnt = buffer_cnt; |
860 | |
861 | if (buffer_cnt > buffer_size) |
862 | { |
863 | start = buffer + buffer_size; |
864 | write_cnt = buffer_cnt - buffer_size; |
865 | } |
866 | |
867 | write_all (fd, start, write_cnt * sizeof (buffer[0])); |
868 | |
869 | /* Go back to the beginning of the file. We allocated two records |
870 | here when we opened the file. */ |
871 | lseek (fd, 0, SEEK_SET); |
872 | /* Write out a record containing the total size. */ |
873 | first.stack = peak_total; |
874 | write_all (fd, &first, sizeof (first)); |
875 | /* Write out another record containing the maximum for heap and |
876 | stack. */ |
877 | first.heap = peak_heap; |
878 | first.stack = peak_stack; |
879 | gettime (&first); |
880 | write_all (fd, &first, sizeof (first)); |
881 | |
882 | /* Close the file. */ |
883 | close (fd); |
884 | fd = -1; |
885 | } |
886 | |
887 | /* Write a colorful statistic. */ |
888 | fprintf (stderr, "\n\ |
889 | \e[01;32mMemory usage summary:\e[0;0m heap total: %llu, heap peak: %lu, stack peak: %lu\n\ |
890 | \e[04;34m total calls total memory failed calls\e[0m\n\ |
891 | \e[00;34m malloc|\e[0m %10lu %12llu %s%12lu\e[00;00m\n\ |
892 | \e[00;34mrealloc|\e[0m %10lu %12llu %s%12lu\e[00;00m (nomove:%ld, dec:%ld, free:%ld)\n\ |
893 | \e[00;34m calloc|\e[0m %10lu %12llu %s%12lu\e[00;00m\n\ |
894 | \e[00;34m free|\e[0m %10lu %12llu\n" , |
895 | (unsigned long long int) grand_total, (unsigned long int) peak_heap, |
896 | (unsigned long int) peak_stack, |
897 | (unsigned long int) calls[idx_malloc], |
898 | (unsigned long long int) total[idx_malloc], |
899 | failed[idx_malloc] ? "\e[01;41m" : "" , |
900 | (unsigned long int) failed[idx_malloc], |
901 | (unsigned long int) calls[idx_realloc], |
902 | (unsigned long long int) total[idx_realloc], |
903 | failed[idx_realloc] ? "\e[01;41m" : "" , |
904 | (unsigned long int) failed[idx_realloc], |
905 | (unsigned long int) inplace, |
906 | (unsigned long int) decreasing, |
907 | (unsigned long int) realloc_free, |
908 | (unsigned long int) calls[idx_calloc], |
909 | (unsigned long long int) total[idx_calloc], |
910 | failed[idx_calloc] ? "\e[01;41m" : "" , |
911 | (unsigned long int) failed[idx_calloc], |
912 | (unsigned long int) calls[idx_free], |
913 | (unsigned long long int) total[idx_free]); |
914 | |
915 | if (trace_mmap) |
916 | fprintf (stderr, "\ |
917 | \e[00;34mmmap(r)|\e[0m %10lu %12llu %s%12lu\e[00;00m\n\ |
918 | \e[00;34mmmap(w)|\e[0m %10lu %12llu %s%12lu\e[00;00m\n\ |
919 | \e[00;34mmmap(a)|\e[0m %10lu %12llu %s%12lu\e[00;00m\n\ |
920 | \e[00;34m mremap|\e[0m %10lu %12llu %s%12lu\e[00;00m (nomove: %ld, dec:%ld)\n\ |
921 | \e[00;34m munmap|\e[0m %10lu %12llu %s%12lu\e[00;00m\n" , |
922 | (unsigned long int) calls[idx_mmap_r], |
923 | (unsigned long long int) total[idx_mmap_r], |
924 | failed[idx_mmap_r] ? "\e[01;41m" : "" , |
925 | (unsigned long int) failed[idx_mmap_r], |
926 | (unsigned long int) calls[idx_mmap_w], |
927 | (unsigned long long int) total[idx_mmap_w], |
928 | failed[idx_mmap_w] ? "\e[01;41m" : "" , |
929 | (unsigned long int) failed[idx_mmap_w], |
930 | (unsigned long int) calls[idx_mmap_a], |
931 | (unsigned long long int) total[idx_mmap_a], |
932 | failed[idx_mmap_a] ? "\e[01;41m" : "" , |
933 | (unsigned long int) failed[idx_mmap_a], |
934 | (unsigned long int) calls[idx_mremap], |
935 | (unsigned long long int) total[idx_mremap], |
936 | failed[idx_mremap] ? "\e[01;41m" : "" , |
937 | (unsigned long int) failed[idx_mremap], |
938 | (unsigned long int) inplace_mremap, |
939 | (unsigned long int) decreasing_mremap, |
940 | (unsigned long int) calls[idx_munmap], |
941 | (unsigned long long int) total[idx_munmap], |
942 | failed[idx_munmap] ? "\e[01;41m" : "" , |
943 | (unsigned long int) failed[idx_munmap]); |
944 | |
945 | /* Write out a histoogram of the sizes of the allocations. */ |
946 | fprintf (stderr, "\e[01;32mHistogram for block sizes:\e[0;0m\n" ); |
947 | |
948 | /* Determine the maximum of all calls for each size range. */ |
949 | maxcalls = large; |
950 | for (cnt = 0; cnt < 65536; cnt += 16) |
951 | if (histogram[cnt / 16] > maxcalls) |
952 | maxcalls = histogram[cnt / 16]; |
953 | |
954 | for (cnt = 0; cnt < 65536; cnt += 16) |
955 | /* Only write out the nonzero entries. */ |
956 | if (histogram[cnt / 16] != 0) |
957 | { |
958 | percent = (histogram[cnt / 16] * 100) / calls_total; |
959 | fprintf (stderr, "%5d-%-5d%12lu " , cnt, cnt + 15, |
960 | (unsigned long int) histogram[cnt / 16]); |
961 | if (percent == 0) |
962 | fputs (" <1% \e[41;37m" , stderr); |
963 | else |
964 | fprintf (stderr, "%3d%% \e[41;37m" , percent); |
965 | |
966 | /* Draw a bar with a length corresponding to the current |
967 | percentage. */ |
968 | percent = (histogram[cnt / 16] * 50) / maxcalls; |
969 | while (percent-- > 0) |
970 | fputc ('=', stderr); |
971 | fputs ("\e[0;0m\n" , stderr); |
972 | } |
973 | |
974 | if (large != 0) |
975 | { |
976 | percent = (large * 100) / calls_total; |
977 | fprintf (stderr, " large %12lu " , (unsigned long int) large); |
978 | if (percent == 0) |
979 | fputs (" <1% \e[41;37m" , stderr); |
980 | else |
981 | fprintf (stderr, "%3d%% \e[41;37m" , percent); |
982 | percent = (large * 50) / maxcalls; |
983 | while (percent-- > 0) |
984 | fputc ('=', stderr); |
985 | fputs ("\e[0;0m\n" , stderr); |
986 | } |
987 | |
988 | /* Any following malloc/free etc. calls should generate statistics again, |
989 | because otherwise freeing something that has been malloced before |
990 | this destructor (including struct header in front of it) wouldn't |
991 | be properly freed. */ |
992 | not_me = false; |
993 | } |
994 | |