Serene Runtime 1.0.0
C runtime for the Serene programming language
Loading...
Searching...
No Matches
switch_x86_64.S
Go to the documentation of this file.
1/*
2 * Serene programming language
3 * Copyright (C) 2019-2026 Sameer Rahmani <[email protected]>
4 *
5 * This library is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library. If not, see <https://www.gnu.org/licenses/>.
17 */
18 .text
19
20/* Notes to myself:
21* Some of the instructions that I'm sure I'll forget:
22* subq $n,%rsp move rsp down n bytes reserve stack space / realign
23* addq $n,%rsp move rsp up n bytes release that space
24* stmxcsr m store MXCSR (32-bit) to m SSE rounding/exception ctl+status
25* ldmxcsr m load MXCSR from m fnstcw m store x87 control word
26* (16-bit) to m (n = no-wait variant)
27* fldcw m load x87 control word from m
28* callq *%r13 push return addr, jump *r13 indirect -> position independent
29* ud2 undefined instruction -> #UD trap; marks "unreachable"
30*
31* Floating-point control state across a fiber switch.
32*
33* x86-64 has two FP units, each with a control register whose *mode* bits
34* govern how every subsequent FP instruction behaves:
35* - MXCSR (SSE, 32-bit): rounding mode, the six exception masks, FTZ/DAZ.
36* SSE carries all scalar float/double arithmetic on x86-64.
37* - x87 control word (16-bit): rounding mode, precision, exception masks.
38*
39* These modes are part of a fiber's state, not scratch. If a fiber selects,
40* say, round-toward-zero or unmasks divide-by-zero, is switched away from,
41* and later resumed, it must find its control words intact -- otherwise it
42* silently runs with whatever another fiber left set: a quiet numeric
43* correctness bug, no crash. So the switch saves the outgoing fiber's control
44* words and restores the incoming one's, just like the callee-saved GP regs.
45*
46* The System V AMD64 ABI requires this: the *control* bits of MXCSR and the
47* x87 control word are callee-saved (a function must preserve them), so a
48* routine that returns into a different fiber must preserve them too. The FP
49* *data* registers (xmm0-15, st0-7) and MXCSR *status* bits are caller-saved,
50* so the compiler spills them around the call and the switch leaves them be.
51*
52* System V AMD64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI
53* Bit layouts of MXCSR and the x87 control word Intel 64 and IA-32 Software
54* Developer's Manual:
55* https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html
56*
57* Saved/loaded with stmxcsr/ldmxcsr (MXCSR) and fnstcw/fldcw (x87 control
58* word, the `n` is the no-wait store). srn_fiber_ctx_make seeds a fresh fiber
59* with the standard startup environment -- MXCSR 0x1F80 (all exceptions
60* masked, round-to-nearest, FTZ/DAZ off) and x87 control word 0x037F (all
61* masked, extended precision, round-to-nearest) -- so a new fiber begins like
62* a freshly started C program.
63*/
64
65/* Saves the current callee-saved state on the current stack, stores rsp into
66 * *from, loads rsp from *to, restores, and returns onto *to's stack. The frame
67 * laid down here is the exact contract srn_fiber_ctx_make reconstructs. */
68 .globl srn_fiber_swap
69 .type srn_fiber_swap, @function
70/* void srn_fiber_swap(srn_fiber_ctx_t *from, srn_fiber_ctx_t *to); */
71srn_fiber_swap:
72 /* Save the outgoing fiber's callee-saved general-purpose registers onto
73 * its current stack. The push order must be the exact reverse of the pops
74 * below, and it is the layout srn_fiber_ctx_make fabricates for a fresh fiber */
75 pushq %rbp
76 pushq %rbx
77 pushq %r12
78 pushq %r13
79 pushq %r14
80 pushq %r15
81
82 /* Also callee-saved per the ABI: the SSE control/status word (MXCSR) and
83 * the x87 FPU control word. Reserve a 16-byte slot and stash both --
84 * stmxcsr writes 4 bytes at %rsp+0, fnstcw writes 2 bytes at %rsp+8. */
85 subq $16, %rsp /* MXCSR (4 bytes) + x87 control word (2 bytes) */
86 stmxcsr (%rsp)
87 fnstcw 8(%rsp)
88
89 /* The switch. After the pushes, %rsp points at the bottom of the outgoing
90 * fiber's saved frame -- that one value captures the entire context, so
91 * store it into *from. Then load the incoming fiber's saved sp from *to;
92 * from this instruction on we are running on the new stack. */
93 movq %rsp, (%rdi) /* from->sp = rsp */
94 movq (%rsi), %rsp /* rsp = to->sp */
95
96 /* Restore the incoming fiber in mirror order: FP control words, drop the
97 * 16-byte slot, then pop the GP registers in reverse of the push. For a
98 * fiber suspended mid-flight this rebuilds exactly what it had; for a fresh
99 * fiber these are the defaults/zeros srn_fiber_ctx_make wrote. */
100 ldmxcsr (%rsp)
101 fldcw 8(%rsp)
102 addq $16, %rsp
103 popq %r15
104 popq %r14
105 popq %r13
106 popq %r12
107 popq %rbx
108 popq %rbp
109
110 /* Return onto the incoming stack. `ret` pops the return address sitting
111 * just above the registers we restored:
112 * - a fiber suspended inside an earlier srn_fiber_swap returns to the
113 * instruction after that call (it simply resumes);
114 * - a brand-new fiber returns into srn_fiber_trampoline, the address
115 * srn_fiber_ctx_make planted in the return-address slot. */
116 ret
117 .size srn_fiber_swap, .-srn_fiber_swap
118
119/* srn_fiber_trampoline — first instruction a fresh fiber runs (reached via the
120 * fake return address). srn_fiber_ctx_make seeds r13 = fn, r12 = arg. Move arg
121 * into the first C argument, realign the stack for the call, and invoke fn(arg).
122 * fn must never return; trap if it does. The call is register-indirect, so this
123 * is position independent. */
124 .globl srn_fiber_trampoline
125 .type srn_fiber_trampoline, @function
126srn_fiber_trampoline:
127 /* Move the seeded argument into the first C argument register. */
128 movq %r12, %rdi
129
130 /* Alignment: the ABI wants %rsp ≡ 0 (mod 16) at a `call`. ctx_make made the
131 * return-address slot 16-aligned, so after the `ret` that landed us here
132 * %rsp ≡ 8 (mod 16) -- exactly the state at a normal function entry.
133 * Subtract 8 to realign before calling out. */
134 subq $8, %rsp
135
136 /* Call entry(arg). Register-indirect, so this needs no relocation and stays
137 * position independent. entry must never return -- it is expected to
138 * srn_fiber_swap away when the fiber yields or finishes. */
139 callq *%r13
140
141 /* Unreachable: if entry returns there is no frame beneath us, so trap
142 * rather than fall off the bottom of the stack. */
143 ud2
144 .size srn_fiber_trampoline, .-srn_fiber_trampoline
145
146 /* This object needs no executable stack; say so to silence the linker. */
147 .section .note.GNU-stack,"",@progbits