Date: Wed, 22 May 2024 11:03:52 +0100
On Wed, May 22, 2024 at 4:00 AM Arthur O'Dwyer wrote:
> auto getLockedMutex(std::mutex *pm) {
> Auto(pm->lock());
> return std::mutex();
> }
> int main() {
> std::mutex m = getLockedMutex(&m);
> m.unlock();
> }
I had to write this out step by step to understand it. So we start with:
auto getLockedMutex(std::mutex *pm)
return std::mutex();
which is equivalent to:
std::mutex getLockedMutex(std::mutex *pm)
return std::mutex();
which is equivalent to:
std::mutex getLockedMutex(std::mutex *pm)
using std::mutex;
struct S { mutex &m; S(mutex &arg) : m(arg) {} ~S(){ m.lock();
} } s(*pm);
return mutex();
which is equivalent on x86_64 and most other platforms to:
void getLockedMutex(std::mutex *arg0, std::mutex *arg1)
using std::mutex;
struct S { mutex &m; S(mutex &arg) : m(arg) {} ~S(){ m.lock();
} } s(*arg1);
::new(arg0) std::mutex;
To be sure, I checked the assembler produced on GodBolt. The first and
last function start off identically:
push rbx
mov rbx, rdi
xorps xmm0, xmm0
movups xmmword ptr [rdi + 16], xmm0
movups xmmword ptr [rdi], xmm0
mov qword ptr [rdi + 32], 0
mov rdi, rsi
call pthread_mutex_lock_at_PLT
test eax, eax
jne Something_Went_Wrong
mov rax, rbx
pop rbx
mov edi, eax
call std::__throw_system_error(int)@PLT
But the last function has two more instructions tagged onto the end:
mov rdi, rax
call __clang_call_terminate
The difference is that in Arthur's original function, if "lock"
throws, then the mutex must get destroyed before the function returns
-- so then I looked through the assembler trying to find the call to
the destructor. . . but I couldn't find it (then I realised that
"mutex::~mutex" doesn't do anything). But there's another difference .
. . if "lock" throws in Arthur's code, then std::terminate is called
because an exception is thrown in the destructor of a local object, as
demonstrated here:
(By the way I suggested to Arthur in an email a few months ago that
he should enclose the destructor of Auto inside a "try/catch(...)").
Let's see what happens though with a class type that has a destructor
with an observable effect. So let's start off with:
struct S {
inline static volatile int n = 0;
S(S const & ) = delete;
S(S &&) = delete;
S() __attribute__ ((noinline)) { n = 1; }
~S() __attribute__ ((noinline)) { n = 2; }
void Throw(void) noexcept(false) __attribute__ ((noinline)) {
if ( 666 != n ) throw n; }
And then we have Arthur's style of function:
S Func(S *p)
struct X { S &s; X(S &arg) : s(arg) {} ~X(){ s.Throw(); } } x(*p);
return S();
And then we have my own style of function:
void Func2_(S *arg0, S *arg1)
struct X { S &s; X(S &arg) : s(arg) {} ~X(){ s.Throw(); } } x(*arg1);
::new(arg0) S();
Arthur's style becomes:
push r14
push rbx
push rax
mov r14, rsi
mov rbx, rdi
call S::S() [base object constructor]
mov rdi, r14
call S::Throw()
mov rax, rbx
add rsp, 8
pop rbx
pop r14
mov rdi, rax
call __clang_call_terminate
While my own style becomes:
push rbx
mov rbx, rsi
call S::S() [base object constructor]
mov rdi, rbx
call S::Throw()
pop rbx
mov rdi, rax
call __clang_call_terminate
Neither of them invoke the destructor of S. If we were to edit
Arthur's style to manually invoke the destructor as follows:
S Func1(S *p)
struct X {
S &s;
X(S &arg) : s(arg) {}
try { s.Throw(); }
catch(...) { s.~S(); throw; }
} x(*p);
return S();
Then the assembler becomes:
push r14
push rbx
push rax
mov rbx, rsi
mov r14, rdi
call S::S() [base object constructor]
mov rdi, rbx
call S::Throw()
mov rax, r14
add rsp, 8
pop rbx
pop r14
mov rdi, rax
call __cxa_begin_catch
mov rdi, rbx
call S::~S() [base object destructor]
call __cxa_rethrow
mov rdi, rax
call __clang_call_terminate
This one is a little better as we can see the destructor getting
called, but when we try to re-throw the exception, std::terminate gets
called. So this isn't exception-safe.
Of all the possible ways of achieving NRVO shared so far on this
mailing list in order to return a locked mutex by value from a
function, I think the best one so far is as follows: Write a function
that returns a Derived<T> instead of a T, and then inside the
constructor of Derived<T>, lock the mutex. Then take the address of
the following function:
Derived<T> Func(void);
and cast it to:
T (*)(void);
Invoke the function pointer, and voila you get back a locked mutex.
Maybe put in a few static_assert's to make sure that Derived<T> is
equivalent in size and alignment to T.
Or of course there's always the idea of giving us full access to the
return slot:
> auto getLockedMutex(std::mutex *pm) {
> Auto(pm->lock());
> return std::mutex();
> }
> int main() {
> std::mutex m = getLockedMutex(&m);
> m.unlock();
> }
I had to write this out step by step to understand it. So we start with:
auto getLockedMutex(std::mutex *pm)
return std::mutex();
which is equivalent to:
std::mutex getLockedMutex(std::mutex *pm)
return std::mutex();
which is equivalent to:
std::mutex getLockedMutex(std::mutex *pm)
using std::mutex;
struct S { mutex &m; S(mutex &arg) : m(arg) {} ~S(){ m.lock();
} } s(*pm);
return mutex();
which is equivalent on x86_64 and most other platforms to:
void getLockedMutex(std::mutex *arg0, std::mutex *arg1)
using std::mutex;
struct S { mutex &m; S(mutex &arg) : m(arg) {} ~S(){ m.lock();
} } s(*arg1);
::new(arg0) std::mutex;
To be sure, I checked the assembler produced on GodBolt. The first and
last function start off identically:
push rbx
mov rbx, rdi
xorps xmm0, xmm0
movups xmmword ptr [rdi + 16], xmm0
movups xmmword ptr [rdi], xmm0
mov qword ptr [rdi + 32], 0
mov rdi, rsi
call pthread_mutex_lock_at_PLT
test eax, eax
jne Something_Went_Wrong
mov rax, rbx
pop rbx
mov edi, eax
call std::__throw_system_error(int)@PLT
But the last function has two more instructions tagged onto the end:
mov rdi, rax
call __clang_call_terminate
The difference is that in Arthur's original function, if "lock"
throws, then the mutex must get destroyed before the function returns
-- so then I looked through the assembler trying to find the call to
the destructor. . . but I couldn't find it (then I realised that
"mutex::~mutex" doesn't do anything). But there's another difference .
. . if "lock" throws in Arthur's code, then std::terminate is called
because an exception is thrown in the destructor of a local object, as
demonstrated here:
(By the way I suggested to Arthur in an email a few months ago that
he should enclose the destructor of Auto inside a "try/catch(...)").
Let's see what happens though with a class type that has a destructor
with an observable effect. So let's start off with:
struct S {
inline static volatile int n = 0;
S(S const & ) = delete;
S(S &&) = delete;
S() __attribute__ ((noinline)) { n = 1; }
~S() __attribute__ ((noinline)) { n = 2; }
void Throw(void) noexcept(false) __attribute__ ((noinline)) {
if ( 666 != n ) throw n; }
And then we have Arthur's style of function:
S Func(S *p)
struct X { S &s; X(S &arg) : s(arg) {} ~X(){ s.Throw(); } } x(*p);
return S();
And then we have my own style of function:
void Func2_(S *arg0, S *arg1)
struct X { S &s; X(S &arg) : s(arg) {} ~X(){ s.Throw(); } } x(*arg1);
::new(arg0) S();
Arthur's style becomes:
push r14
push rbx
push rax
mov r14, rsi
mov rbx, rdi
call S::S() [base object constructor]
mov rdi, r14
call S::Throw()
mov rax, rbx
add rsp, 8
pop rbx
pop r14
mov rdi, rax
call __clang_call_terminate
While my own style becomes:
push rbx
mov rbx, rsi
call S::S() [base object constructor]
mov rdi, rbx
call S::Throw()
pop rbx
mov rdi, rax
call __clang_call_terminate
Neither of them invoke the destructor of S. If we were to edit
Arthur's style to manually invoke the destructor as follows:
S Func1(S *p)
struct X {
S &s;
X(S &arg) : s(arg) {}
try { s.Throw(); }
catch(...) { s.~S(); throw; }
} x(*p);
return S();
Then the assembler becomes:
push r14
push rbx
push rax
mov rbx, rsi
mov r14, rdi
call S::S() [base object constructor]
mov rdi, rbx
call S::Throw()
mov rax, r14
add rsp, 8
pop rbx
pop r14
mov rdi, rax
call __cxa_begin_catch
mov rdi, rbx
call S::~S() [base object destructor]
call __cxa_rethrow
mov rdi, rax
call __clang_call_terminate
This one is a little better as we can see the destructor getting
called, but when we try to re-throw the exception, std::terminate gets
called. So this isn't exception-safe.
Of all the possible ways of achieving NRVO shared so far on this
mailing list in order to return a locked mutex by value from a
function, I think the best one so far is as follows: Write a function
that returns a Derived<T> instead of a T, and then inside the
constructor of Derived<T>, lock the mutex. Then take the address of
the following function:
Derived<T> Func(void);
and cast it to:
T (*)(void);
Invoke the function pointer, and voila you get back a locked mutex.
Maybe put in a few static_assert's to make sure that Derived<T> is
equivalent in size and alignment to T.
Or of course there's always the idea of giving us full access to the
return slot:
Received on 2024-05-22 10:04:06