C++ Logo

std-proposals

Advanced search

Re: [std-proposals] Interceptor Function (preserve stack and all registers)

From: Frederick Virchanza Gotham <cauldwell.thomas_at_[hidden]>
Date: Sun, 12 Apr 2026 02:23:11 +0100
On Sun, Apr 12, 2026 at 1:37 AM Thiago Macieira wrote:
>
> It isn't designed for a tail call. In fact, one major problem of your proposal
> is that you need one register to be clobbered because you need to save the
> address to which you'll jump in it.


What about putting the address inside a thread_local variable?


> If we stick to the general ABIs, then there are several caller-save
> registers that aren't using for parameter passing, and one of them specifically
> reserved for function call thunks to use (I think it's r11).


Here's what I'm thinking for System V x86_64 to accommodate __m512:

    .macro __builtin_push_all_argument_registers_onto_stack
        sub $576, %rsp # 56 + (8 * 64) + 8 = 576 bytes
        mov %rax, 0(%rsp) # save %al too, for SysV varargs
        mov %rdi, 8(%rsp)
        mov %rsi, 16(%rsp)
        mov %rdx, 24(%rsp)
        mov %rcx, 32(%rsp)
        mov %r8, 40(%rsp)
        mov %r9, 48(%rsp)
        vmovdqu64 %zmm0, 56(%rsp)
        vmovdqu64 %zmm1, 120(%rsp)
        vmovdqu64 %zmm2, 184(%rsp)
        vmovdqu64 %zmm3, 248(%rsp)
        vmovdqu64 %zmm4, 312(%rsp)
        vmovdqu64 %zmm5, 376(%rsp)
        vmovdqu64 %zmm6, 440(%rsp)
        vmovdqu64 %zmm7, 504(%rsp)
        # jump address gets stored later at 568(%rsp)
    .endm

    .macro __builtin_pop_all_arguments_off_stack_and_perform_tailcall
        mov 0(%rsp), %rax
        mov 8(%rsp), %rdi
        mov 16(%rsp), %rsi
        mov 24(%rsp), %rdx
        mov 32(%rsp), %rcx
        mov 40(%rsp), %r8
        mov 48(%rsp), %r9
        vmovdqu64 56(%rsp), %zmm0
        vmovdqu64 120(%rsp), %zmm1
        vmovdqu64 184(%rsp), %zmm2
        vmovdqu64 248(%rsp), %zmm3
        vmovdqu64 312(%rsp), %zmm4
        vmovdqu64 376(%rsp), %zmm5
        vmovdqu64 440(%rsp), %zmm6
        vmovdqu64 504(%rsp), %zmm7
        add $576, %rsp
        jmp *-8(%rsp)
    .endm

I think that will work perfectly for every conceivable function call
under System V x86_64.

Received on 2026-04-12 01:23:24