1 | /* PLT trampolines. x86-64 version. |
2 | Copyright (C) 2009-2016 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ |
18 | |
19 | #undef REGISTER_SAVE_AREA_RAW |
20 | #ifdef __ILP32__ |
21 | /* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to |
22 | VEC7. */ |
23 | # define REGISTER_SAVE_AREA_RAW (8 * 7 + VEC_SIZE * 8) |
24 | #else |
25 | /* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as |
26 | BND0, BND1, BND2, BND3 and VEC0 to VEC7. */ |
27 | # define REGISTER_SAVE_AREA_RAW (8 * 7 + 16 * 4 + VEC_SIZE * 8) |
28 | #endif |
29 | |
30 | #undef REGISTER_SAVE_AREA |
31 | #undef LOCAL_STORAGE_AREA |
32 | #undef BASE |
33 | #if DL_RUNIME_RESOLVE_REALIGN_STACK |
34 | # define REGISTER_SAVE_AREA (REGISTER_SAVE_AREA_RAW + 8) |
35 | /* Local stack area before jumping to function address: RBX. */ |
36 | # define LOCAL_STORAGE_AREA 8 |
37 | # define BASE rbx |
38 | # if (REGISTER_SAVE_AREA % VEC_SIZE) != 0 |
39 | # error REGISTER_SAVE_AREA must be multples of VEC_SIZE |
40 | # endif |
41 | #else |
42 | # define REGISTER_SAVE_AREA REGISTER_SAVE_AREA_RAW |
43 | /* Local stack area before jumping to function address: All saved |
44 | registers. */ |
45 | # define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA |
46 | # define BASE rsp |
47 | # if (REGISTER_SAVE_AREA % 16) != 8 |
48 | # error REGISTER_SAVE_AREA must be odd multples of 8 |
49 | # endif |
50 | #endif |
51 | |
52 | .text |
53 | .globl _dl_runtime_resolve |
54 | .hidden _dl_runtime_resolve |
55 | .type _dl_runtime_resolve, @function |
56 | .align 16 |
57 | cfi_startproc |
58 | _dl_runtime_resolve: |
59 | cfi_adjust_cfa_offset(16) # Incorporate PLT |
60 | #if DL_RUNIME_RESOLVE_REALIGN_STACK |
61 | # if LOCAL_STORAGE_AREA != 8 |
62 | # error LOCAL_STORAGE_AREA must be 8 |
63 | # endif |
64 | pushq %rbx # push subtracts stack by 8. |
65 | cfi_adjust_cfa_offset(8) |
66 | cfi_rel_offset(%rbx, 0) |
67 | mov %RSP_LP, %RBX_LP |
68 | cfi_def_cfa_register(%rbx) |
69 | and $-VEC_SIZE, %RSP_LP |
70 | #endif |
71 | sub $REGISTER_SAVE_AREA, %RSP_LP |
72 | cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) |
73 | # Preserve registers otherwise clobbered. |
74 | movq %rax, REGISTER_SAVE_RAX(%rsp) |
75 | movq %rcx, REGISTER_SAVE_RCX(%rsp) |
76 | movq %rdx, REGISTER_SAVE_RDX(%rsp) |
77 | movq %rsi, REGISTER_SAVE_RSI(%rsp) |
78 | movq %rdi, REGISTER_SAVE_RDI(%rsp) |
79 | movq %r8, REGISTER_SAVE_R8(%rsp) |
80 | movq %r9, REGISTER_SAVE_R9(%rsp) |
81 | VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp) |
82 | VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp) |
83 | VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp) |
84 | VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp) |
85 | VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp) |
86 | VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp) |
87 | VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp) |
88 | VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp) |
89 | #ifndef __ILP32__ |
90 | # We also have to preserve bound registers. These are nops if |
91 | # Intel MPX isn't available or disabled. |
92 | # ifdef HAVE_MPX_SUPPORT |
93 | bndmov %bnd0, REGISTER_SAVE_BND0(%rsp) |
94 | bndmov %bnd1, REGISTER_SAVE_BND1(%rsp) |
95 | bndmov %bnd2, REGISTER_SAVE_BND2(%rsp) |
96 | bndmov %bnd3, REGISTER_SAVE_BND3(%rsp) |
97 | # else |
98 | # if REGISTER_SAVE_BND0 == 0 |
99 | .byte 0x66,0x0f,0x1b,0x04,0x24 |
100 | # else |
101 | .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0 |
102 | # endif |
103 | .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1 |
104 | .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2 |
105 | .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3 |
106 | # endif |
107 | #endif |
108 | # Copy args pushed by PLT in register. |
109 | # %rdi: link_map, %rsi: reloc_index |
110 | mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP |
111 | mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP |
112 | call _dl_fixup # Call resolver. |
113 | mov %RAX_LP, %R11_LP # Save return value |
114 | #ifndef __ILP32__ |
115 | # Restore bound registers. These are nops if Intel MPX isn't |
116 | # avaiable or disabled. |
117 | # ifdef HAVE_MPX_SUPPORT |
118 | bndmov REGISTER_SAVE_BND3(%rsp), %bnd3 |
119 | bndmov REGISTER_SAVE_BND2(%rsp), %bnd2 |
120 | bndmov REGISTER_SAVE_BND1(%rsp), %bnd1 |
121 | bndmov REGISTER_SAVE_BND0(%rsp), %bnd0 |
122 | # else |
123 | .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3 |
124 | .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2 |
125 | .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1 |
126 | # if REGISTER_SAVE_BND0 == 0 |
127 | .byte 0x66,0x0f,0x1a,0x04,0x24 |
128 | # else |
129 | .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0 |
130 | # endif |
131 | # endif |
132 | #endif |
133 | # Get register content back. |
134 | movq REGISTER_SAVE_R9(%rsp), %r9 |
135 | movq REGISTER_SAVE_R8(%rsp), %r8 |
136 | movq REGISTER_SAVE_RDI(%rsp), %rdi |
137 | movq REGISTER_SAVE_RSI(%rsp), %rsi |
138 | movq REGISTER_SAVE_RDX(%rsp), %rdx |
139 | movq REGISTER_SAVE_RCX(%rsp), %rcx |
140 | movq REGISTER_SAVE_RAX(%rsp), %rax |
141 | VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0) |
142 | VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1) |
143 | VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2) |
144 | VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3) |
145 | VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4) |
146 | VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5) |
147 | VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6) |
148 | VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7) |
149 | #if DL_RUNIME_RESOLVE_REALIGN_STACK |
150 | mov %RBX_LP, %RSP_LP |
151 | cfi_def_cfa_register(%rsp) |
152 | movq (%rsp), %rbx |
153 | cfi_restore(%rbx) |
154 | #endif |
155 | # Adjust stack(PLT did 2 pushes) |
156 | add $(LOCAL_STORAGE_AREA + 16), %RSP_LP |
157 | cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16)) |
158 | # Preserve bound registers. |
159 | PRESERVE_BND_REGS_PREFIX |
160 | jmp *%r11 # Jump to function address. |
161 | cfi_endproc |
162 | .size _dl_runtime_resolve, .-_dl_runtime_resolve |
163 | |
164 | |
165 | #ifndef PROF |
166 | # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0 |
167 | # error LR_VECTOR_OFFSET must be multples of VEC_SIZE |
168 | # endif |
169 | |
170 | .globl _dl_runtime_profile |
171 | .hidden _dl_runtime_profile |
172 | .type _dl_runtime_profile, @function |
173 | .align 16 |
174 | _dl_runtime_profile: |
175 | cfi_startproc |
176 | cfi_adjust_cfa_offset(16) # Incorporate PLT |
177 | /* The La_x86_64_regs data structure pointed to by the |
178 | fourth paramater must be VEC_SIZE-byte aligned. This must |
179 | be explicitly enforced. We have the set up a dynamically |
180 | sized stack frame. %rbx points to the top half which |
181 | has a fixed size and preserves the original stack pointer. */ |
182 | |
183 | sub $32, %RSP_LP # Allocate the local storage. |
184 | cfi_adjust_cfa_offset(32) |
185 | movq %rbx, (%rsp) |
186 | cfi_rel_offset(%rbx, 0) |
187 | |
188 | /* On the stack: |
189 | 56(%rbx) parameter #1 |
190 | 48(%rbx) return address |
191 | |
192 | 40(%rbx) reloc index |
193 | 32(%rbx) link_map |
194 | |
195 | 24(%rbx) La_x86_64_regs pointer |
196 | 16(%rbx) framesize |
197 | 8(%rbx) rax |
198 | (%rbx) rbx |
199 | */ |
200 | |
201 | movq %rax, 8(%rsp) |
202 | mov %RSP_LP, %RBX_LP |
203 | cfi_def_cfa_register(%rbx) |
204 | |
205 | /* Actively align the La_x86_64_regs structure. */ |
206 | and $-VEC_SIZE, %RSP_LP |
207 | /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers |
208 | to detect if any xmm0-xmm7 registers are changed by audit |
209 | module. */ |
210 | sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP |
211 | movq %rsp, 24(%rbx) |
212 | |
213 | /* Fill the La_x86_64_regs structure. */ |
214 | movq %rdx, LR_RDX_OFFSET(%rsp) |
215 | movq %r8, LR_R8_OFFSET(%rsp) |
216 | movq %r9, LR_R9_OFFSET(%rsp) |
217 | movq %rcx, LR_RCX_OFFSET(%rsp) |
218 | movq %rsi, LR_RSI_OFFSET(%rsp) |
219 | movq %rdi, LR_RDI_OFFSET(%rsp) |
220 | movq %rbp, LR_RBP_OFFSET(%rsp) |
221 | |
222 | lea 48(%rbx), %RAX_LP |
223 | movq %rax, LR_RSP_OFFSET(%rsp) |
224 | |
225 | /* We always store the XMM registers even if AVX is available. |
226 | This is to provide backward binary compatibility for existing |
227 | audit modules. */ |
228 | movaps %xmm0, (LR_XMM_OFFSET)(%rsp) |
229 | movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) |
230 | movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) |
231 | movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) |
232 | movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) |
233 | movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) |
234 | movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) |
235 | movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) |
236 | |
237 | # ifndef __ILP32__ |
238 | # ifdef HAVE_MPX_SUPPORT |
239 | bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound |
240 | bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if |
241 | bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available |
242 | bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled. |
243 | # else |
244 | .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET) |
245 | .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) |
246 | .byte 0x66,0x0f,0x1b,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) |
247 | .byte 0x66,0x0f,0x1b,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) |
248 | # endif |
249 | # endif |
250 | |
251 | # ifdef RESTORE_AVX |
252 | /* This is to support AVX audit modules. */ |
253 | VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp) |
254 | VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) |
255 | VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) |
256 | VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) |
257 | VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) |
258 | VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) |
259 | VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) |
260 | VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) |
261 | |
262 | /* Save xmm0-xmm7 registers to detect if any of them are |
263 | changed by audit module. */ |
264 | vmovdqa %xmm0, (LR_SIZE)(%rsp) |
265 | vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp) |
266 | vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp) |
267 | vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp) |
268 | vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp) |
269 | vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) |
270 | vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) |
271 | vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) |
272 | # endif |
273 | |
274 | mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx. |
275 | mov 48(%rbx), %RDX_LP # Load return address if needed. |
276 | mov 40(%rbx), %RSI_LP # Copy args pushed by PLT in register. |
277 | mov 32(%rbx), %RDI_LP # %rdi: link_map, %rsi: reloc_index |
278 | lea 16(%rbx), %R8_LP # Address of framesize |
279 | call _dl_profile_fixup # Call resolver. |
280 | |
281 | mov %RAX_LP, %R11_LP # Save return value. |
282 | |
283 | movq 8(%rbx), %rax # Get back register content. |
284 | movq LR_RDX_OFFSET(%rsp), %rdx |
285 | movq LR_R8_OFFSET(%rsp), %r8 |
286 | movq LR_R9_OFFSET(%rsp), %r9 |
287 | |
288 | movaps (LR_XMM_OFFSET)(%rsp), %xmm0 |
289 | movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1 |
290 | movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 |
291 | movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 |
292 | movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 |
293 | movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 |
294 | movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 |
295 | movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 |
296 | |
297 | # ifdef RESTORE_AVX |
298 | /* Check if any xmm0-xmm7 registers are changed by audit |
299 | module. */ |
300 | vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 |
301 | vpmovmskb %xmm8, %esi |
302 | cmpl $0xffff, %esi |
303 | je 2f |
304 | vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) |
305 | jmp 1f |
306 | 2: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0) |
307 | vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) |
308 | |
309 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 |
310 | vpmovmskb %xmm8, %esi |
311 | cmpl $0xffff, %esi |
312 | je 2f |
313 | vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) |
314 | jmp 1f |
315 | 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) |
316 | vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) |
317 | |
318 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 |
319 | vpmovmskb %xmm8, %esi |
320 | cmpl $0xffff, %esi |
321 | je 2f |
322 | vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) |
323 | jmp 1f |
324 | 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) |
325 | vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) |
326 | |
327 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 |
328 | vpmovmskb %xmm8, %esi |
329 | cmpl $0xffff, %esi |
330 | je 2f |
331 | vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) |
332 | jmp 1f |
333 | 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) |
334 | vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) |
335 | |
336 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 |
337 | vpmovmskb %xmm8, %esi |
338 | cmpl $0xffff, %esi |
339 | je 2f |
340 | vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) |
341 | jmp 1f |
342 | 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) |
343 | vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) |
344 | |
345 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 |
346 | vpmovmskb %xmm8, %esi |
347 | cmpl $0xffff, %esi |
348 | je 2f |
349 | vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) |
350 | jmp 1f |
351 | 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) |
352 | vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) |
353 | |
354 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 |
355 | vpmovmskb %xmm8, %esi |
356 | cmpl $0xffff, %esi |
357 | je 2f |
358 | vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) |
359 | jmp 1f |
360 | 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) |
361 | vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) |
362 | |
363 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 |
364 | vpmovmskb %xmm8, %esi |
365 | cmpl $0xffff, %esi |
366 | je 2f |
367 | vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) |
368 | jmp 1f |
369 | 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) |
370 | vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) |
371 | |
372 | 1: |
373 | # endif |
374 | |
375 | # ifndef __ILP32__ |
376 | # ifdef HAVE_MPX_SUPPORT |
377 | bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound |
378 | bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers. |
379 | bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2 |
380 | bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3 |
381 | # else |
382 | .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET) |
383 | .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) |
384 | .byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) |
385 | .byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) |
386 | # endif |
387 | # endif |
388 | |
389 | mov 16(%rbx), %R10_LP # Anything in framesize? |
390 | test %R10_LP, %R10_LP |
391 | PRESERVE_BND_REGS_PREFIX |
392 | jns 3f |
393 | |
394 | /* There's nothing in the frame size, so there |
395 | will be no call to the _dl_call_pltexit. */ |
396 | |
397 | /* Get back registers content. */ |
398 | movq LR_RCX_OFFSET(%rsp), %rcx |
399 | movq LR_RSI_OFFSET(%rsp), %rsi |
400 | movq LR_RDI_OFFSET(%rsp), %rdi |
401 | |
402 | mov %RBX_LP, %RSP_LP |
403 | movq (%rsp), %rbx |
404 | cfi_restore(%rbx) |
405 | cfi_def_cfa_register(%rsp) |
406 | |
407 | add $48, %RSP_LP # Adjust the stack to the return value |
408 | # (eats the reloc index and link_map) |
409 | cfi_adjust_cfa_offset(-48) |
410 | PRESERVE_BND_REGS_PREFIX |
411 | jmp *%r11 # Jump to function address. |
412 | |
413 | 3: |
414 | cfi_adjust_cfa_offset(48) |
415 | cfi_rel_offset(%rbx, 0) |
416 | cfi_def_cfa_register(%rbx) |
417 | |
418 | /* At this point we need to prepare new stack for the function |
419 | which has to be called. We copy the original stack to a |
420 | temporary buffer of the size specified by the 'framesize' |
421 | returned from _dl_profile_fixup */ |
422 | |
423 | lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack |
424 | add $8, %R10_LP |
425 | and $-16, %R10_LP |
426 | mov %R10_LP, %RCX_LP |
427 | sub %R10_LP, %RSP_LP |
428 | mov %RSP_LP, %RDI_LP |
429 | shr $3, %RCX_LP |
430 | rep |
431 | movsq |
432 | |
433 | movq 24(%rdi), %rcx # Get back register content. |
434 | movq 32(%rdi), %rsi |
435 | movq 40(%rdi), %rdi |
436 | |
437 | PRESERVE_BND_REGS_PREFIX |
438 | call *%r11 |
439 | |
440 | mov 24(%rbx), %RSP_LP # Drop the copied stack content |
441 | |
442 | /* Now we have to prepare the La_x86_64_retval structure for the |
443 | _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, |
444 | so we just need to allocate the sizeof(La_x86_64_retval) space on |
445 | the stack, since the alignment has already been taken care of. */ |
446 | # ifdef RESTORE_AVX |
447 | /* sizeof(La_x86_64_retval). Need extra space for 2 SSE |
448 | registers to detect if xmm0/xmm1 registers are changed |
449 | by audit module. */ |
450 | sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP |
451 | # else |
452 | sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval) |
453 | # endif |
454 | mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx. |
455 | |
456 | /* Fill in the La_x86_64_retval structure. */ |
457 | movq %rax, LRV_RAX_OFFSET(%rcx) |
458 | movq %rdx, LRV_RDX_OFFSET(%rcx) |
459 | |
460 | movaps %xmm0, LRV_XMM0_OFFSET(%rcx) |
461 | movaps %xmm1, LRV_XMM1_OFFSET(%rcx) |
462 | |
463 | # ifdef RESTORE_AVX |
464 | /* This is to support AVX audit modules. */ |
465 | VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx) |
466 | VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx) |
467 | |
468 | /* Save xmm0/xmm1 registers to detect if they are changed |
469 | by audit module. */ |
470 | vmovdqa %xmm0, (LRV_SIZE)(%rcx) |
471 | vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) |
472 | # endif |
473 | |
474 | # ifndef __ILP32__ |
475 | # ifdef HAVE_MPX_SUPPORT |
476 | bndmov %bnd0, LRV_BND0_OFFSET(%rcx) # Preserve returned bounds. |
477 | bndmov %bnd1, LRV_BND1_OFFSET(%rcx) |
478 | # else |
479 | .byte 0x66,0x0f,0x1b,0x81;.long (LRV_BND0_OFFSET) |
480 | .byte 0x66,0x0f,0x1b,0x89;.long (LRV_BND1_OFFSET) |
481 | # endif |
482 | # endif |
483 | |
484 | fstpt LRV_ST0_OFFSET(%rcx) |
485 | fstpt LRV_ST1_OFFSET(%rcx) |
486 | |
487 | movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. |
488 | movq 40(%rbx), %rsi # Copy args pushed by PLT in register. |
489 | movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index |
490 | call _dl_call_pltexit |
491 | |
492 | /* Restore return registers. */ |
493 | movq LRV_RAX_OFFSET(%rsp), %rax |
494 | movq LRV_RDX_OFFSET(%rsp), %rdx |
495 | |
496 | movaps LRV_XMM0_OFFSET(%rsp), %xmm0 |
497 | movaps LRV_XMM1_OFFSET(%rsp), %xmm1 |
498 | |
499 | # ifdef RESTORE_AVX |
500 | /* Check if xmm0/xmm1 registers are changed by audit module. */ |
501 | vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 |
502 | vpmovmskb %xmm2, %esi |
503 | cmpl $0xffff, %esi |
504 | jne 1f |
505 | VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0) |
506 | |
507 | 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 |
508 | vpmovmskb %xmm2, %esi |
509 | cmpl $0xffff, %esi |
510 | jne 1f |
511 | VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1) |
512 | |
513 | 1: |
514 | # endif |
515 | |
516 | # ifndef __ILP32__ |
517 | # ifdef HAVE_MPX_SUPPORT |
518 | bndmov LRV_BND0_OFFSET(%rsp), %bnd0 # Restore bound registers. |
519 | bndmov LRV_BND1_OFFSET(%rsp), %bnd1 |
520 | # else |
521 | .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LRV_BND0_OFFSET) |
522 | .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LRV_BND1_OFFSET) |
523 | # endif |
524 | # endif |
525 | |
526 | fldt LRV_ST1_OFFSET(%rsp) |
527 | fldt LRV_ST0_OFFSET(%rsp) |
528 | |
529 | mov %RBX_LP, %RSP_LP |
530 | movq (%rsp), %rbx |
531 | cfi_restore(%rbx) |
532 | cfi_def_cfa_register(%rsp) |
533 | |
534 | add $48, %RSP_LP # Adjust the stack to the return value |
535 | # (eats the reloc index and link_map) |
536 | cfi_adjust_cfa_offset(-48) |
537 | PRESERVE_BND_REGS_PREFIX |
538 | retq |
539 | |
540 | cfi_endproc |
541 | .size _dl_runtime_profile, .-_dl_runtime_profile |
542 | #endif |
543 | |