Date: Thu, 12 Jun 2025 16:14:11 +0100
On Mon, Jun 9, 2025 at 11:44 PM Frederick Virchanza Gotham wrote:
>
> I've already shown that you can 'dynamic_cast' from a ' void * '
> to a ' void * ', so long as both the source and destination
> are polymorphic, but I want to take it a step further and try to
> 'dynamic_cast' from a ' void * ' to any polymorphic type.
I've been trying to use the function "abi::__dynamic_cast" found
inside libstdc++ to perform a dynamic_cast from the most-derived
object to any of its base objects (including to virtual base objects),
but I can't get it to work with virtual bases. The function doesn't
seem to be coded to do an up-cast to a virtual base.
If you write C++ code to do an up-cast from a most-derived object to a
virtual base and then compile it to assembler, you'll see that
'abi::__dynamic_cast' never actually gets called -- instead, the
compiler just hardcodes numeric offsets into the assembler, so here's
what you'll see if you try to do a dynamic_cast from the most-derived
object to a virtual base object:
mov rax, rdi
mov rcx, qword ptr [rdi]
add rax, qword ptr [rcx - 40]
ret
So, seeing as how the compiler can do this operation all by itself, I
can see why "abi::__dynamic_cast" doesn't necessarily need to be able
to do it. And that's probably why it can't.
So then I switched over to looking at 'libcxxrt'. I started with the
function '__dynamic_cast' here:
https://github.com/libcxxrt/libcxxrt/blob/a6f71cbc3a1e1b8b9df241e081fa0ffdcde96249/src/dynamic_cast.cc#L200
I tried calling this function directly but just couldn't get it to
work -- it kept giving me the wrong address for virtual bases. So I
looked a little deeper, and found the most relevant piece of code here
in the implementation of "__vmi_class_type_info::__do_upcast":
https://github.com/libcxxrt/libcxxrt/blob/a6f71cbc3a1e1b8b9df241e081fa0ffdcde96249/src/dynamic_cast.cc#L146
I started copy-pasting parts here and there, and I put together the
following function:
void *MyDynamicCast(
void *const arg_most_derived,
std::type_info const &arg_src,
std::type_info const &arg_dst)
{
if ( arg_dst == arg_src ) return arg_most_derived;
using std::ptrdiff_t;
using __cxxabiv1::__class_type_info;
using __cxxabiv1::__si_class_type_info;
using __cxxabiv1::__vmi_class_type_info;
using __cxxabiv1::__base_class_type_info;
__class_type_info const &dst = static_cast<__class_type_info
const&>(arg_dst);
auto const mylambda = [&dst](auto &&self, void *const
most_derived, __class_type_info const &src) -> void*
{
// First check if the source type has a single non-virtual
base class at offset 0x00 (i.e. __si_class)
__si_class_type_info const *const psi =
dynamic_cast<__si_class_type_info const*>(&src);
if ( nullptr != psi )
{
__class_type_info const &base_cti = *psi->__base_type;
if ( base_cti == dst ) return most_derived;
// Now we go recursively to see if it's a base of a base
(...of a base, of a base)
return self(self, most_derived, base_cti);
}
__vmi_class_type_info const *const pvmi =
dynamic_cast<__vmi_class_type_info const *>(&src);
if ( nullptr == pvmi ) return nullptr;
for ( unsigned i = 0u; i < pvmi->__base_count ; ++i )
{
__base_class_type_info const *const info = &pvmi->__base_info[i];
ptrdiff_t offset = info->offset();
if ( info->isVirtual() )
{
ptrdiff_t const *p_offset =
*static_cast<ptrdiff_t**>(most_derived);
p_offset += offset/sizeof(ptrdiff_t);
offset = *p_offset;
}
__class_type_info const &base_cti = *info->__base_type;
char *const target = static_cast<char*>(most_derived) + offset;
if ( base_cti == dst ) return target;
// Now we go recursively to see if it's a base of a base
(...of a base, of a base)
// IMPORTANT NOTE: The value of 'most_derived' had been adjusted
void *const retval = self(self, target, base_cti);
if ( nullptr != retval ) return retval;
}
return nullptr;
};
return mylambda(mylambda, arg_most_derived,
static_cast<__class_type_info const&>(arg_src));
}
I've done some testing, and it seems that this function works
perfectly all the time. I've tried torturing it with the following
class hierarchy:
struct Base0 { virtual ~Base0(){} }; //
polymorphic
struct Base1 { void *p[600]; }; //
-- NOT polymorphic
struct Base2 { virtual ~Base2(){} }; //
polymorphic
struct Base3 { virtual ~Base3(){} }; //
polymorphic
struct Base3a : Base0, Base3 {}; //
polymorphic
struct Base3b : Base1, virtual Base3a {}; //
polymorphic
struct BaseMiddle : Base3b { virtual ~BaseMiddle(){} }; // plymorphic
struct Base4 { virtual ~Base4(){} }; //
polymorphic
struct Derived : virtual Base2, virtual BaseMiddle, Base4 {}; //
polymorphic
And here's the full GodBolt tested and working on GNU g++, LLVM
clang++, Intel ICX:
https://godbolt.org/z/6h4roc7ee
Conclusion: With the Itanium C++ ABI, it's possible to perform a
dynamic_cast from a type-erased pointer to a polymorphic object. This
means that 'std::polyhandle' can have a method called 'dynamicCast'
for all Itanium compilers.
Next we've to see what's possible with the Microsoft ABI.
>
> I've already shown that you can 'dynamic_cast' from a ' void * '
> to a ' void * ', so long as both the source and destination
> are polymorphic, but I want to take it a step further and try to
> 'dynamic_cast' from a ' void * ' to any polymorphic type.
I've been trying to use the function "abi::__dynamic_cast" found
inside libstdc++ to perform a dynamic_cast from the most-derived
object to any of its base objects (including to virtual base objects),
but I can't get it to work with virtual bases. The function doesn't
seem to be coded to do an up-cast to a virtual base.
If you write C++ code to do an up-cast from a most-derived object to a
virtual base and then compile it to assembler, you'll see that
'abi::__dynamic_cast' never actually gets called -- instead, the
compiler just hardcodes numeric offsets into the assembler, so here's
what you'll see if you try to do a dynamic_cast from the most-derived
object to a virtual base object:
mov rax, rdi
mov rcx, qword ptr [rdi]
add rax, qword ptr [rcx - 40]
ret
So, seeing as how the compiler can do this operation all by itself, I
can see why "abi::__dynamic_cast" doesn't necessarily need to be able
to do it. And that's probably why it can't.
So then I switched over to looking at 'libcxxrt'. I started with the
function '__dynamic_cast' here:
https://github.com/libcxxrt/libcxxrt/blob/a6f71cbc3a1e1b8b9df241e081fa0ffdcde96249/src/dynamic_cast.cc#L200
I tried calling this function directly but just couldn't get it to
work -- it kept giving me the wrong address for virtual bases. So I
looked a little deeper, and found the most relevant piece of code here
in the implementation of "__vmi_class_type_info::__do_upcast":
https://github.com/libcxxrt/libcxxrt/blob/a6f71cbc3a1e1b8b9df241e081fa0ffdcde96249/src/dynamic_cast.cc#L146
I started copy-pasting parts here and there, and I put together the
following function:
void *MyDynamicCast(
void *const arg_most_derived,
std::type_info const &arg_src,
std::type_info const &arg_dst)
{
if ( arg_dst == arg_src ) return arg_most_derived;
using std::ptrdiff_t;
using __cxxabiv1::__class_type_info;
using __cxxabiv1::__si_class_type_info;
using __cxxabiv1::__vmi_class_type_info;
using __cxxabiv1::__base_class_type_info;
__class_type_info const &dst = static_cast<__class_type_info
const&>(arg_dst);
auto const mylambda = [&dst](auto &&self, void *const
most_derived, __class_type_info const &src) -> void*
{
// First check if the source type has a single non-virtual
base class at offset 0x00 (i.e. __si_class)
__si_class_type_info const *const psi =
dynamic_cast<__si_class_type_info const*>(&src);
if ( nullptr != psi )
{
__class_type_info const &base_cti = *psi->__base_type;
if ( base_cti == dst ) return most_derived;
// Now we go recursively to see if it's a base of a base
(...of a base, of a base)
return self(self, most_derived, base_cti);
}
__vmi_class_type_info const *const pvmi =
dynamic_cast<__vmi_class_type_info const *>(&src);
if ( nullptr == pvmi ) return nullptr;
for ( unsigned i = 0u; i < pvmi->__base_count ; ++i )
{
__base_class_type_info const *const info = &pvmi->__base_info[i];
ptrdiff_t offset = info->offset();
if ( info->isVirtual() )
{
ptrdiff_t const *p_offset =
*static_cast<ptrdiff_t**>(most_derived);
p_offset += offset/sizeof(ptrdiff_t);
offset = *p_offset;
}
__class_type_info const &base_cti = *info->__base_type;
char *const target = static_cast<char*>(most_derived) + offset;
if ( base_cti == dst ) return target;
// Now we go recursively to see if it's a base of a base
(...of a base, of a base)
// IMPORTANT NOTE: The value of 'most_derived' had been adjusted
void *const retval = self(self, target, base_cti);
if ( nullptr != retval ) return retval;
}
return nullptr;
};
return mylambda(mylambda, arg_most_derived,
static_cast<__class_type_info const&>(arg_src));
}
I've done some testing, and it seems that this function works
perfectly all the time. I've tried torturing it with the following
class hierarchy:
struct Base0 { virtual ~Base0(){} }; //
polymorphic
struct Base1 { void *p[600]; }; //
-- NOT polymorphic
struct Base2 { virtual ~Base2(){} }; //
polymorphic
struct Base3 { virtual ~Base3(){} }; //
polymorphic
struct Base3a : Base0, Base3 {}; //
polymorphic
struct Base3b : Base1, virtual Base3a {}; //
polymorphic
struct BaseMiddle : Base3b { virtual ~BaseMiddle(){} }; // plymorphic
struct Base4 { virtual ~Base4(){} }; //
polymorphic
struct Derived : virtual Base2, virtual BaseMiddle, Base4 {}; //
polymorphic
And here's the full GodBolt tested and working on GNU g++, LLVM
clang++, Intel ICX:
https://godbolt.org/z/6h4roc7ee
Conclusion: With the Itanium C++ ABI, it's possible to perform a
dynamic_cast from a type-erased pointer to a polymorphic object. This
means that 'std::polyhandle' can have a method called 'dynamicCast'
for all Itanium compilers.
Next we've to see what's possible with the Microsoft ABI.
Received on 2025-06-12 15:14:25