| 1 | /* Profiling of shared libraries. | 
| 2 |    Copyright (C) 1997-2019 Free Software Foundation, Inc. | 
| 3 |    This file is part of the GNU C Library. | 
| 4 |    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. | 
| 5 |    Based on the BSD mcount implementation. | 
| 6 |  | 
| 7 |    The GNU C Library is free software; you can redistribute it and/or | 
| 8 |    modify it under the terms of the GNU Lesser General Public | 
| 9 |    License as published by the Free Software Foundation; either | 
| 10 |    version 2.1 of the License, or (at your option) any later version. | 
| 11 |  | 
| 12 |    The GNU C Library is distributed in the hope that it will be useful, | 
| 13 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 14 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
| 15 |    Lesser General Public License for more details. | 
| 16 |  | 
| 17 |    You should have received a copy of the GNU Lesser General Public | 
| 18 |    License along with the GNU C Library; if not, see | 
| 19 |    <http://www.gnu.org/licenses/>.  */ | 
| 20 |  | 
| 21 | #include <assert.h> | 
| 22 | #include <errno.h> | 
| 23 | #include <fcntl.h> | 
| 24 | #include <inttypes.h> | 
| 25 | #include <limits.h> | 
| 26 | #include <stdio.h> | 
| 27 | #include <stdlib.h> | 
| 28 | #include <string.h> | 
| 29 | #include <unistd.h> | 
| 30 | #include <stdint.h> | 
| 31 | #include <ldsodefs.h> | 
| 32 | #include <sys/gmon.h> | 
| 33 | #include <sys/gmon_out.h> | 
| 34 | #include <sys/mman.h> | 
| 35 | #include <sys/param.h> | 
| 36 | #include <sys/stat.h> | 
| 37 | #include <atomic.h> | 
| 38 | #include <not-cancel.h> | 
| 39 |  | 
| 40 | /* The LD_PROFILE feature has to be implemented different to the | 
| 41 |    normal profiling using the gmon/ functions.  The problem is that an | 
| 42 |    arbitrary amount of processes simulataneously can be run using | 
| 43 |    profiling and all write the results in the same file.  To provide | 
| 44 |    this mechanism one could implement a complicated mechanism to merge | 
| 45 |    the content of two profiling runs or one could extend the file | 
| 46 |    format to allow more than one data set.  For the second solution we | 
| 47 |    would have the problem that the file can grow in size beyond any | 
| 48 |    limit and both solutions have the problem that the concurrency of | 
| 49 |    writing the results is a big problem. | 
| 50 |  | 
| 51 |    Another much simpler method is to use mmap to map the same file in | 
| 52 |    all using programs and modify the data in the mmap'ed area and so | 
| 53 |    also automatically on the disk.  Using the MAP_SHARED option of | 
| 54 |    mmap(2) this can be done without big problems in more than one | 
| 55 |    file. | 
| 56 |  | 
| 57 |    This approach is very different from the normal profiling.  We have | 
| 58 |    to use the profiling data in exactly the way they are expected to | 
| 59 |    be written to disk.  But the normal format used by gprof is not usable | 
| 60 |    to do this.  It is optimized for size.  It writes the tags as single | 
| 61 |    bytes but this means that the following 32/64 bit values are | 
| 62 |    unaligned. | 
| 63 |  | 
| 64 |    Therefore we use a new format.  This will look like this | 
| 65 |  | 
| 66 | 					0  1  2  3	<- byte is 32 bit word | 
| 67 | 	0000				g  m  o  n | 
| 68 | 	0004				*version*	<- GMON_SHOBJ_VERSION | 
| 69 | 	0008				00 00 00 00 | 
| 70 | 	000c				00 00 00 00 | 
| 71 | 	0010				00 00 00 00 | 
| 72 |  | 
| 73 | 	0014				*tag*		<- GMON_TAG_TIME_HIST | 
| 74 | 	0018				?? ?? ?? ?? | 
| 75 | 					?? ?? ?? ??	<- 32/64 bit LowPC | 
| 76 | 	0018+A				?? ?? ?? ?? | 
| 77 | 					?? ?? ?? ??	<- 32/64 bit HighPC | 
| 78 | 	0018+2*A			*histsize* | 
| 79 | 	001c+2*A			*profrate* | 
| 80 | 	0020+2*A			s  e  c  o | 
| 81 | 	0024+2*A			n  d  s  \0 | 
| 82 | 	0028+2*A			\0 \0 \0 \0 | 
| 83 | 	002c+2*A			\0 \0 \0 | 
| 84 | 	002f+2*A			s | 
| 85 |  | 
| 86 | 	0030+2*A			?? ?? ?? ??	<- Count data | 
| 87 | 	...				... | 
| 88 | 	0030+2*A+K			?? ?? ?? ?? | 
| 89 |  | 
| 90 | 	0030+2*A+K			*tag*		<- GMON_TAG_CG_ARC | 
| 91 | 	0034+2*A+K			*lastused* | 
| 92 | 	0038+2*A+K			?? ?? ?? ?? | 
| 93 | 					?? ?? ?? ??	<- FromPC#1 | 
| 94 | 	0038+3*A+K			?? ?? ?? ?? | 
| 95 | 					?? ?? ?? ??	<- ToPC#1 | 
| 96 | 	0038+4*A+K			?? ?? ?? ??	<- Count#1 | 
| 97 | 	...				...		   ... | 
| 98 | 	0038+(2*(CN-1)+2)*A+(CN-1)*4+K	?? ?? ?? ?? | 
| 99 | 					?? ?? ?? ??	<- FromPC#CGN | 
| 100 | 	0038+(2*(CN-1)+3)*A+(CN-1)*4+K	?? ?? ?? ?? | 
| 101 | 					?? ?? ?? ??	<- ToPC#CGN | 
| 102 | 	0038+(2*CN+2)*A+(CN-1)*4+K	?? ?? ?? ??	<- Count#CGN | 
| 103 |  | 
| 104 |    We put (for now?) no basic block information in the file since this would | 
| 105 |    introduce rase conditions among all the processes who want to write them. | 
| 106 |  | 
| 107 |    `K' is the number of count entries which is computed as | 
| 108 |  | 
| 109 |  		textsize / HISTFRACTION | 
| 110 |  | 
| 111 |    `CG' in the above table is the number of call graph arcs.  Normally, | 
| 112 |    the table is sparse and the profiling code writes out only the those | 
| 113 |    entries which are really used in the program run.  But since we must | 
| 114 |    not extend this table (the profiling file) we'll keep them all here. | 
| 115 |    So CN can be executed in advance as | 
| 116 |  | 
| 117 | 		MINARCS <= textsize*(ARCDENSITY/100) <= MAXARCS | 
| 118 |  | 
| 119 |    Now the remaining question is: how to build the data structures we can | 
| 120 |    work with from this data.  We need the from set and must associate the | 
| 121 |    froms with all the associated tos.  We will do this by constructing this | 
| 122 |    data structures at the program start.  To do this we'll simply visit all | 
| 123 |    entries in the call graph table and add it to the appropriate list.  */ | 
| 124 |  | 
| 125 | extern int __profile_frequency (void); | 
| 126 | libc_hidden_proto (__profile_frequency) | 
| 127 |  | 
| 128 | /* We define a special type to address the elements of the arc table. | 
| 129 |    This is basically the `gmon_cg_arc_record' format but it includes | 
| 130 |    the room for the tag and it uses real types.  */ | 
| 131 | struct here_cg_arc_record | 
| 132 |   { | 
| 133 |     uintptr_t from_pc; | 
| 134 |     uintptr_t self_pc; | 
| 135 |     /* The count field is atomically incremented in _dl_mcount, which | 
| 136 |        requires it to be properly aligned for its type, and for this | 
| 137 |        alignment to be visible to the compiler.  The amount of data | 
| 138 |        before an array of this structure is calculated as | 
| 139 |        expected_size in _dl_start_profile.  Everything in that | 
| 140 |        calculation is a multiple of 4 bytes (in the case of | 
| 141 |        kcountsize, because it is derived from a subtraction of | 
| 142 |        page-aligned values, and the corresponding calculation in | 
| 143 |        __monstartup also ensures it is at least a multiple of the size | 
| 144 |        of u_long), so all copies of this field do in fact have the | 
| 145 |        appropriate alignment.  */ | 
| 146 |     uint32_t count __attribute__ ((aligned (__alignof__ (uint32_t)))); | 
| 147 |   } __attribute__ ((packed)); | 
| 148 |  | 
| 149 | static struct here_cg_arc_record *data; | 
| 150 |  | 
| 151 | /* Nonzero if profiling is under way.  */ | 
| 152 | static int running; | 
| 153 |  | 
| 154 | /* This is the number of entry which have been incorporated in the toset.  */ | 
| 155 | static uint32_t narcs; | 
| 156 | /* This is a pointer to the object representing the number of entries | 
| 157 |    currently in the mmaped file.  At no point of time this has to be the | 
| 158 |    same as NARCS.  If it is equal all entries from the file are in our | 
| 159 |    lists.  */ | 
| 160 | static volatile uint32_t *narcsp; | 
| 161 |  | 
| 162 |  | 
| 163 | struct here_fromstruct | 
| 164 |   { | 
| 165 |     struct here_cg_arc_record volatile *here; | 
| 166 |     uint16_t link; | 
| 167 |   }; | 
| 168 |  | 
| 169 | static volatile uint16_t *tos; | 
| 170 |  | 
| 171 | static struct here_fromstruct *froms; | 
| 172 | static uint32_t fromlimit; | 
| 173 | static volatile uint32_t fromidx; | 
| 174 |  | 
| 175 | static uintptr_t lowpc; | 
| 176 | static size_t textsize; | 
| 177 | static unsigned int log_hashfraction; | 
| 178 |  | 
| 179 |  | 
| 180 |  | 
| 181 | /* Set up profiling data to profile object desribed by MAP.  The output | 
| 182 |    file is found (or created) in OUTPUT_DIR.  */ | 
| 183 | void | 
| 184 | _dl_start_profile (void) | 
| 185 | { | 
| 186 |   char *filename; | 
| 187 |   int fd; | 
| 188 |   struct stat64 st; | 
| 189 |   const ElfW(Phdr) *ph; | 
| 190 |   ElfW(Addr) mapstart = ~((ElfW(Addr)) 0); | 
| 191 |   ElfW(Addr) mapend = 0; | 
| 192 |   char *hist, *cp; | 
| 193 |   size_t idx; | 
| 194 |   size_t tossize; | 
| 195 |   size_t fromssize; | 
| 196 |   uintptr_t highpc; | 
| 197 |   uint16_t *kcount; | 
| 198 |   size_t kcountsize; | 
| 199 |   struct gmon_hdr *addr = NULL; | 
| 200 |   off_t expected_size; | 
| 201 |   /* See profil(2) where this is described.  */ | 
| 202 |   int s_scale; | 
| 203 | #define SCALE_1_TO_1	0x10000L | 
| 204 |   const char *errstr = NULL; | 
| 205 |  | 
| 206 |   /* Compute the size of the sections which contain program code.  */ | 
| 207 |   for (ph = GL(dl_profile_map)->l_phdr; | 
| 208 |        ph < &GL(dl_profile_map)->l_phdr[GL(dl_profile_map)->l_phnum]; ++ph) | 
| 209 |     if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X)) | 
| 210 |       { | 
| 211 | 	ElfW(Addr) start = (ph->p_vaddr & ~(GLRO(dl_pagesize) - 1)); | 
| 212 | 	ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + GLRO(dl_pagesize) - 1) | 
| 213 | 			  & ~(GLRO(dl_pagesize) - 1)); | 
| 214 |  | 
| 215 | 	if (start < mapstart) | 
| 216 | 	  mapstart = start; | 
| 217 | 	if (end > mapend) | 
| 218 | 	  mapend = end; | 
| 219 |       } | 
| 220 |  | 
| 221 |   /* Now we can compute the size of the profiling data.  This is done | 
| 222 |      with the same formulars as in `monstartup' (see gmon.c).  */ | 
| 223 |   running = 0; | 
| 224 |   lowpc = ROUNDDOWN (mapstart + GL(dl_profile_map)->l_addr, | 
| 225 | 		     HISTFRACTION * sizeof (HISTCOUNTER)); | 
| 226 |   highpc = ROUNDUP (mapend + GL(dl_profile_map)->l_addr, | 
| 227 | 		    HISTFRACTION * sizeof (HISTCOUNTER)); | 
| 228 |   textsize = highpc - lowpc; | 
| 229 |   kcountsize = textsize / HISTFRACTION; | 
| 230 |   if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) | 
| 231 |     { | 
| 232 |       /* If HASHFRACTION is a power of two, mcount can use shifting | 
| 233 | 	 instead of integer division.  Precompute shift amount. | 
| 234 |  | 
| 235 | 	 This is a constant but the compiler cannot compile the | 
| 236 | 	 expression away since the __ffs implementation is not known | 
| 237 | 	 to the compiler.  Help the compiler by precomputing the | 
| 238 | 	 usual cases.  */ | 
| 239 |       assert (HASHFRACTION == 2); | 
| 240 |  | 
| 241 |       if (sizeof (*froms) == 8) | 
| 242 | 	log_hashfraction = 4; | 
| 243 |       else if (sizeof (*froms) == 16) | 
| 244 | 	log_hashfraction = 5; | 
| 245 |       else | 
| 246 | 	log_hashfraction = __ffs (HASHFRACTION * sizeof (*froms)) - 1; | 
| 247 |     } | 
| 248 |   else | 
| 249 |     log_hashfraction = -1; | 
| 250 |   tossize = textsize / HASHFRACTION; | 
| 251 |   fromlimit = textsize * ARCDENSITY / 100; | 
| 252 |   if (fromlimit < MINARCS) | 
| 253 |     fromlimit = MINARCS; | 
| 254 |   if (fromlimit > MAXARCS) | 
| 255 |     fromlimit = MAXARCS; | 
| 256 |   fromssize = fromlimit * sizeof (struct here_fromstruct); | 
| 257 |  | 
| 258 |   expected_size = (sizeof (struct gmon_hdr) | 
| 259 | 		   + 4 + sizeof (struct gmon_hist_hdr) + kcountsize | 
| 260 | 		   + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record)); | 
| 261 |  | 
| 262 |   /* Create the gmon_hdr we expect or write.  */ | 
| 263 |   struct real_gmon_hdr | 
| 264 |   { | 
| 265 |     char cookie[4]; | 
| 266 |     int32_t version; | 
| 267 |     char spare[3 * 4]; | 
| 268 |   } gmon_hdr; | 
| 269 |   if (sizeof (gmon_hdr) != sizeof (struct gmon_hdr) | 
| 270 |       || (offsetof (struct real_gmon_hdr, cookie) | 
| 271 | 	  != offsetof (struct gmon_hdr, cookie)) | 
| 272 |       || (offsetof (struct real_gmon_hdr, version) | 
| 273 | 	  != offsetof (struct gmon_hdr, version))) | 
| 274 |     abort (); | 
| 275 |  | 
| 276 |   memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie)); | 
| 277 |   gmon_hdr.version = GMON_SHOBJ_VERSION; | 
| 278 |   memset (gmon_hdr.spare, '\0', sizeof (gmon_hdr.spare)); | 
| 279 |  | 
| 280 |   /* Create the hist_hdr we expect or write.  */ | 
| 281 |   struct real_gmon_hist_hdr | 
| 282 |   { | 
| 283 |     char *low_pc; | 
| 284 |     char *high_pc; | 
| 285 |     int32_t hist_size; | 
| 286 |     int32_t prof_rate; | 
| 287 |     char dimen[15]; | 
| 288 |     char dimen_abbrev; | 
| 289 |   } hist_hdr; | 
| 290 |   if (sizeof (hist_hdr) != sizeof (struct gmon_hist_hdr) | 
| 291 |       || (offsetof (struct real_gmon_hist_hdr, low_pc) | 
| 292 | 	  != offsetof (struct gmon_hist_hdr, low_pc)) | 
| 293 |       || (offsetof (struct real_gmon_hist_hdr, high_pc) | 
| 294 | 	  != offsetof (struct gmon_hist_hdr, high_pc)) | 
| 295 |       || (offsetof (struct real_gmon_hist_hdr, hist_size) | 
| 296 | 	  != offsetof (struct gmon_hist_hdr, hist_size)) | 
| 297 |       || (offsetof (struct real_gmon_hist_hdr, prof_rate) | 
| 298 | 	  != offsetof (struct gmon_hist_hdr, prof_rate)) | 
| 299 |       || (offsetof (struct real_gmon_hist_hdr, dimen) | 
| 300 | 	  != offsetof (struct gmon_hist_hdr, dimen)) | 
| 301 |       || (offsetof (struct real_gmon_hist_hdr, dimen_abbrev) | 
| 302 | 	  != offsetof (struct gmon_hist_hdr, dimen_abbrev))) | 
| 303 |     abort (); | 
| 304 |  | 
| 305 |   hist_hdr.low_pc = (char *) mapstart; | 
| 306 |   hist_hdr.high_pc = (char *) mapend; | 
| 307 |   hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER); | 
| 308 |   hist_hdr.prof_rate = __profile_frequency (); | 
| 309 |   if (sizeof (hist_hdr.dimen) >= sizeof ("seconds" )) | 
| 310 |     { | 
| 311 |       memcpy (hist_hdr.dimen, "seconds" , sizeof ("seconds" )); | 
| 312 |       memset (hist_hdr.dimen + sizeof ("seconds" ), '\0', | 
| 313 | 	      sizeof (hist_hdr.dimen) - sizeof ("seconds" )); | 
| 314 |     } | 
| 315 |   else | 
| 316 |     strncpy (hist_hdr.dimen, "seconds" , sizeof (hist_hdr.dimen)); | 
| 317 |   hist_hdr.dimen_abbrev = 's'; | 
| 318 |  | 
| 319 |   /* First determine the output name.  We write in the directory | 
| 320 |      OUTPUT_DIR and the name is composed from the shared objects | 
| 321 |      soname (or the file name) and the ending ".profile".  */ | 
| 322 |   filename = (char *) alloca (strlen (GLRO(dl_profile_output)) + 1 | 
| 323 | 			      + strlen (GLRO(dl_profile)) + sizeof ".profile" ); | 
| 324 |   cp = __stpcpy (filename, GLRO(dl_profile_output)); | 
| 325 |   *cp++ = '/'; | 
| 326 |   __stpcpy (__stpcpy (cp, GLRO(dl_profile)), ".profile" ); | 
| 327 |  | 
| 328 |   fd = __open64_nocancel (filename, O_RDWR|O_CREAT|O_NOFOLLOW, DEFFILEMODE); | 
| 329 |   if (fd == -1) | 
| 330 |     { | 
| 331 |       char buf[400]; | 
| 332 |       int errnum; | 
| 333 |  | 
| 334 |       /* We cannot write the profiling data so don't do anything.  */ | 
| 335 |       errstr = "%s: cannot open file: %s\n" ; | 
| 336 |     print_error: | 
| 337 |       errnum = errno; | 
| 338 |       if (fd != -1) | 
| 339 | 	__close_nocancel (fd); | 
| 340 |       _dl_error_printf (errstr, filename, | 
| 341 | 			__strerror_r (errnum, buf, sizeof buf)); | 
| 342 |       return; | 
| 343 |     } | 
| 344 |  | 
| 345 |   if (__fxstat64 (_STAT_VER, fd, &st) < 0 || !S_ISREG (st.st_mode)) | 
| 346 |     { | 
| 347 |       /* Not stat'able or not a regular file => don't use it.  */ | 
| 348 |       errstr = "%s: cannot stat file: %s\n" ; | 
| 349 |       goto print_error; | 
| 350 |     } | 
| 351 |  | 
| 352 |   /* Test the size.  If it does not match what we expect from the size | 
| 353 |      values in the map MAP we don't use it and warn the user.  */ | 
| 354 |   if (st.st_size == 0) | 
| 355 |     { | 
| 356 |       /* We have to create the file.  */ | 
| 357 |       char buf[GLRO(dl_pagesize)]; | 
| 358 |  | 
| 359 |       memset (buf, '\0', GLRO(dl_pagesize)); | 
| 360 |  | 
| 361 |       if (__lseek (fd, expected_size & ~(GLRO(dl_pagesize) - 1), SEEK_SET) == -1) | 
| 362 | 	{ | 
| 363 | 	cannot_create: | 
| 364 | 	  errstr = "%s: cannot create file: %s\n" ; | 
| 365 | 	  goto print_error; | 
| 366 | 	} | 
| 367 |  | 
| 368 |       if (TEMP_FAILURE_RETRY | 
| 369 | 	  (__write_nocancel (fd, buf, (expected_size & (GLRO(dl_pagesize) - 1)))) | 
| 370 | 	  < 0) | 
| 371 | 	goto cannot_create; | 
| 372 |     } | 
| 373 |   else if (st.st_size != expected_size) | 
| 374 |     { | 
| 375 |       __close_nocancel (fd); | 
| 376 |     wrong_format: | 
| 377 |  | 
| 378 |       if (addr != NULL) | 
| 379 | 	__munmap ((void *) addr, expected_size); | 
| 380 |  | 
| 381 |       _dl_error_printf ("%s: file is no correct profile data file for `%s'\n" , | 
| 382 | 			filename, GLRO(dl_profile)); | 
| 383 |       return; | 
| 384 |     } | 
| 385 |  | 
| 386 |   addr = (struct gmon_hdr *) __mmap (NULL, expected_size, PROT_READ|PROT_WRITE, | 
| 387 | 				     MAP_SHARED|MAP_FILE, fd, 0); | 
| 388 |   if (addr == (struct gmon_hdr *) MAP_FAILED) | 
| 389 |     { | 
| 390 |       errstr = "%s: cannot map file: %s\n" ; | 
| 391 |       goto print_error; | 
| 392 |     } | 
| 393 |  | 
| 394 |   /* We don't need the file descriptor anymore.  */ | 
| 395 |   __close_nocancel (fd); | 
| 396 |  | 
| 397 |   /* Pointer to data after the header.  */ | 
| 398 |   hist = (char *) (addr + 1); | 
| 399 |   kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t) | 
| 400 | 			 + sizeof (struct gmon_hist_hdr)); | 
| 401 |  | 
| 402 |   /* Compute pointer to array of the arc information.  */ | 
| 403 |   narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t)); | 
| 404 |   data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t)); | 
| 405 |  | 
| 406 |   if (st.st_size == 0) | 
| 407 |     { | 
| 408 |       /* Create the signature.  */ | 
| 409 |       memcpy (addr, &gmon_hdr, sizeof (struct gmon_hdr)); | 
| 410 |  | 
| 411 |       *(uint32_t *) hist = GMON_TAG_TIME_HIST; | 
| 412 |       memcpy (hist + sizeof (uint32_t), &hist_hdr, | 
| 413 | 	      sizeof (struct gmon_hist_hdr)); | 
| 414 |  | 
| 415 |       narcsp[-1] = GMON_TAG_CG_ARC; | 
| 416 |     } | 
| 417 |   else | 
| 418 |     { | 
| 419 |       /* Test the signature in the file.  */ | 
| 420 |       if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0 | 
| 421 | 	  || *(uint32_t *) hist != GMON_TAG_TIME_HIST | 
| 422 | 	  || memcmp (hist + sizeof (uint32_t), &hist_hdr, | 
| 423 | 		     sizeof (struct gmon_hist_hdr)) != 0 | 
| 424 | 	  || narcsp[-1] != GMON_TAG_CG_ARC) | 
| 425 | 	goto wrong_format; | 
| 426 |     } | 
| 427 |  | 
| 428 |   /* Allocate memory for the froms data and the pointer to the tos records.  */ | 
| 429 |   tos = (uint16_t *) calloc (tossize + fromssize, 1); | 
| 430 |   if (tos == NULL) | 
| 431 |     { | 
| 432 |       __munmap ((void *) addr, expected_size); | 
| 433 |       _dl_fatal_printf ("Out of memory while initializing profiler\n" ); | 
| 434 |       /* NOTREACHED */ | 
| 435 |     } | 
| 436 |  | 
| 437 |   froms = (struct here_fromstruct *) ((char *) tos + tossize); | 
| 438 |   fromidx = 0; | 
| 439 |  | 
| 440 |   /* Now we have to process all the arc count entries.  BTW: it is | 
| 441 |      not critical whether the *NARCSP value changes meanwhile.  Before | 
| 442 |      we enter a new entry in to toset we will check that everything is | 
| 443 |      available in TOS.  This happens in _dl_mcount. | 
| 444 |  | 
| 445 |      Loading the entries in reverse order should help to get the most | 
| 446 |      frequently used entries at the front of the list.  */ | 
| 447 |   for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; ) | 
| 448 |     { | 
| 449 |       size_t to_index; | 
| 450 |       size_t newfromidx; | 
| 451 |       --idx; | 
| 452 |       to_index = (data[idx].self_pc / (HASHFRACTION * sizeof (*tos))); | 
| 453 |       newfromidx = fromidx++; | 
| 454 |       froms[newfromidx].here = &data[idx]; | 
| 455 |       froms[newfromidx].link = tos[to_index]; | 
| 456 |       tos[to_index] = newfromidx; | 
| 457 |     } | 
| 458 |  | 
| 459 |   /* Setup counting data.  */ | 
| 460 |   if (kcountsize < highpc - lowpc) | 
| 461 |     { | 
| 462 | #if 0 | 
| 463 |       s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1; | 
| 464 | #else | 
| 465 |       size_t range = highpc - lowpc; | 
| 466 |       size_t quot = range / kcountsize; | 
| 467 |  | 
| 468 |       if (quot >= SCALE_1_TO_1) | 
| 469 | 	s_scale = 1; | 
| 470 |       else if (quot >= SCALE_1_TO_1 / 256) | 
| 471 | 	s_scale = SCALE_1_TO_1 / quot; | 
| 472 |       else if (range > ULONG_MAX / 256) | 
| 473 | 	s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256)); | 
| 474 |       else | 
| 475 | 	s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize); | 
| 476 | #endif | 
| 477 |     } | 
| 478 |   else | 
| 479 |     s_scale = SCALE_1_TO_1; | 
| 480 |  | 
| 481 |   /* Start the profiler.  */ | 
| 482 |   __profil ((void *) kcount, kcountsize, lowpc, s_scale); | 
| 483 |  | 
| 484 |   /* Turn on profiling.  */ | 
| 485 |   running = 1; | 
| 486 | } | 
| 487 |  | 
| 488 |  | 
| 489 | void | 
| 490 | _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc) | 
| 491 | { | 
| 492 |   volatile uint16_t *topcindex; | 
| 493 |   size_t i, fromindex; | 
| 494 |   struct here_fromstruct *fromp; | 
| 495 |  | 
| 496 |   if (! running) | 
| 497 |     return; | 
| 498 |  | 
| 499 |   /* Compute relative addresses.  The shared object can be loaded at | 
| 500 |      any address.  The value of frompc could be anything.  We cannot | 
| 501 |      restrict it in any way, just set to a fixed value (0) in case it | 
| 502 |      is outside the allowed range.  These calls show up as calls from | 
| 503 |      <external> in the gprof output.  */ | 
| 504 |   frompc -= lowpc; | 
| 505 |   if (frompc >= textsize) | 
| 506 |     frompc = 0; | 
| 507 |   selfpc -= lowpc; | 
| 508 |   if (selfpc >= textsize) | 
| 509 |     goto done; | 
| 510 |  | 
| 511 |   /* Getting here we now have to find out whether the location was | 
| 512 |      already used.  If yes we are lucky and only have to increment a | 
| 513 |      counter (this also has to be atomic).  If the entry is new things | 
| 514 |      are getting complicated...  */ | 
| 515 |  | 
| 516 |   /* Avoid integer divide if possible.  */ | 
| 517 |   if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) | 
| 518 |     i = selfpc >> log_hashfraction; | 
| 519 |   else | 
| 520 |     i = selfpc / (HASHFRACTION * sizeof (*tos)); | 
| 521 |  | 
| 522 |   topcindex = &tos[i]; | 
| 523 |   fromindex = *topcindex; | 
| 524 |  | 
| 525 |   if (fromindex == 0) | 
| 526 |     goto check_new_or_add; | 
| 527 |  | 
| 528 |   fromp = &froms[fromindex]; | 
| 529 |  | 
| 530 |   /* We have to look through the chain of arcs whether there is already | 
| 531 |      an entry for our arc.  */ | 
| 532 |   while (fromp->here->from_pc != frompc) | 
| 533 |     { | 
| 534 |       if (fromp->link != 0) | 
| 535 | 	do | 
| 536 | 	  fromp = &froms[fromp->link]; | 
| 537 | 	while (fromp->link != 0 && fromp->here->from_pc != frompc); | 
| 538 |  | 
| 539 |       if (fromp->here->from_pc != frompc) | 
| 540 | 	{ | 
| 541 | 	  topcindex = &fromp->link; | 
| 542 |  | 
| 543 | 	check_new_or_add: | 
| 544 | 	  /* Our entry is not among the entries we read so far from the | 
| 545 | 	     data file.  Now see whether we have to update the list.  */ | 
| 546 | 	  while (narcs != *narcsp && narcs < fromlimit) | 
| 547 | 	    { | 
| 548 | 	      size_t to_index; | 
| 549 | 	      size_t newfromidx; | 
| 550 | 	      to_index = (data[narcs].self_pc | 
| 551 | 			  / (HASHFRACTION * sizeof (*tos))); | 
| 552 | 	      newfromidx = catomic_exchange_and_add (&fromidx, 1) + 1; | 
| 553 | 	      froms[newfromidx].here = &data[narcs]; | 
| 554 | 	      froms[newfromidx].link = tos[to_index]; | 
| 555 | 	      tos[to_index] = newfromidx; | 
| 556 | 	      catomic_increment (&narcs); | 
| 557 | 	    } | 
| 558 |  | 
| 559 | 	  /* If we still have no entry stop searching and insert.  */ | 
| 560 | 	  if (*topcindex == 0) | 
| 561 | 	    { | 
| 562 | 	      uint_fast32_t newarc = catomic_exchange_and_add (narcsp, 1); | 
| 563 |  | 
| 564 | 	      /* In rare cases it could happen that all entries in FROMS are | 
| 565 | 		 occupied.  So we cannot count this anymore.  */ | 
| 566 | 	      if (newarc >= fromlimit) | 
| 567 | 		goto done; | 
| 568 |  | 
| 569 | 	      *topcindex = catomic_exchange_and_add (&fromidx, 1) + 1; | 
| 570 | 	      fromp = &froms[*topcindex]; | 
| 571 |  | 
| 572 | 	      fromp->here = &data[newarc]; | 
| 573 | 	      data[newarc].from_pc = frompc; | 
| 574 | 	      data[newarc].self_pc = selfpc; | 
| 575 | 	      data[newarc].count = 0; | 
| 576 | 	      fromp->link = 0; | 
| 577 | 	      catomic_increment (&narcs); | 
| 578 |  | 
| 579 | 	      break; | 
| 580 | 	    } | 
| 581 |  | 
| 582 | 	  fromp = &froms[*topcindex]; | 
| 583 | 	} | 
| 584 |       else | 
| 585 | 	/* Found in.  */ | 
| 586 | 	break; | 
| 587 |     } | 
| 588 |  | 
| 589 |   /* Increment the counter.  */ | 
| 590 |   catomic_increment (&fromp->here->count); | 
| 591 |  | 
| 592 |  done: | 
| 593 |   ; | 
| 594 | } | 
| 595 | rtld_hidden_def (_dl_mcount) | 
| 596 |  |