C++ Logo

std-proposals

Advanced search

Re: [std-proposals] std::polyhandle (Draft Paper Attached)

From: Frederick Virchanza Gotham <cauldwell.thomas_at_[hidden]>
Date: Sat, 14 Jun 2025 01:29:45 +0100
On Thu, Jun 12, 2025 at 4:14 PM Frederick Virchanza Gotham wrote:
>
> Conclusion: With the Itanium C++ ABI, it's possible to perform a
> dynamic_cast from a type-erased pointer to a polymorphic object. This
> means that 'std::polyhandle' can have a method called 'dynamicCast'
> for all Itanium compilers.
>
> Next we've to see what's possible with the Microsoft ABI.


Just now I got 'dynamic_cast' from 'void*' working on the Microsoft
compiler. (Note that I'm talking about _FROM_ a void*, not _TO_ a
void*).

So it looks like we can implement std::polyhandle::dynamicCast on both
the Itanium ABI and the Microsoft ABI.

It was a little tricky. We need to invoke '__RTDynamicCast', not
giving it the type_info of the most-derived object, but rather giving
it the type_info of the current object. To find the type_info of the
current object, I get the RTTICompleteObjectLocator for the current
object which I use to get the address of the most-derived object. I
then walk through all of the bases in the hierarchy struct found
inside the RTTICompleteObjectLocator, and each base has a numerical
offset from the most-derived object. So one by one, I compare the
address of the current object to the address of most_derived +
base_offset. When I get a match, I pluck out the type_info for that
base type, and that's what I pass to '__RTDynamicCast'. It works.

More than one of the bases can have the same offset from the
most-derived object. But actually this doesn't matter -- any of them
will suffice so long as the offset is correct. For instance if you
have a 'stringstream' object and you try to find the 'ostream', then
you get the 'ostream' -- but if you try to find the 'istream', you
actually get back some other base class which has the same offset as
'istream' -- but it still works fine.

The code I wrote for the Itanium ABI, as well as this code for the
Microsoft compiler, doesn't ignore invisible bases (i.e. private /
protected bases). I was thinking maybe 'std::polyhandle::dynamicCast'
could give you the choice as follows:

    template<typename T>
    T std::polyhandle::dynamicCast(bool include_invisible = false);

So if you use it normally:

    mypolyhandle.dynamicCast<ostream>();

then it won't give you back a private/protected base. However if you do:

    mypolyhandle.dynamicCast<ostream>(true);

then it will also give you a private/protected base -- although maybe
this would not be a good feature, and maybe it wouldn't be predictable
if there's a class that gets inherited more than once in the entire
class hierarchy (e.g. inherited twice privately and once publicly).

Anyway here's the code I wrote just now for the Microsoft compiler:

      https://godbolt.org/z/v6dKjrhYc

And here it is copy-pasted:

#include <cstdint> // int32_t, uint32_t
#include <memory> // addressof
#include <type_traits> // is_polymorphic, remove_cvref
#include <typeinfo> // type_info
#include <utility> // unreachable

using std::int32_t, std::uint32_t;

struct RTTICompleteObjectLocator {
    uint32_t signature;
    uint32_t offset;
    uint32_t cdOffset;
     int32_t pTypeDescriptor;
     int32_t pClassDescriptor;
     int32_t pSelf;
};

struct TypeDescriptor {
    void const *pVFTable;
    void *spare;
    char name[];
};

struct RTTIClassHierarchyDescriptor {
    uint32_t signature;
    uint32_t attributes;
    uint32_t numBaseClasses;
     int32_t pBaseClassArray;
};

struct RTTIBaseClassArray {
    int32_t arrayOfBaseClassDescriptors[];
};

struct RTTIBaseClassDescriptor {
     int32_t pTypeDescriptor;
    uint32_t numContainedBases;
    struct PMD {
        int32_t mdisp;
        int32_t pdisp;
        int32_t vdisp;
    } where;
    uint32_t attributes;
     int32_t pClassDescriptor;
};

extern "C" {
    // Including <Windows.h> is too much
    void *__stdcall GetModuleHandleA(char const*);
    void *__stdcall LoadLibraryA (char const*);
    void *__stdcall GetProcAddress (void*,char const*);
}

thread_local void const *argument_to_RTCTV = nullptr;

extern "C" {
inline void *__RTCastToVoid(void *const arg) noexcept(false)
{
    argument_to_RTCTV = arg; // save this to do a subtraction later!

    void *hRuntime = ::GetModuleHandleA("vcruntime140.dll");
    if ( nullptr == hRuntime ) hRuntime = ::LoadLibraryA("vcruntime140.dll");
    if ( nullptr == hRuntime ) return nullptr;
    auto const fp = (void*(*)(void *)) ::GetProcAddress(hRuntime,
"__RTCastToVoid");
    if ( nullptr == fp ) return nullptr;
    return fp(arg);
}
}

template<class Tref>
requires std::is_polymorphic_v< std::remove_cvref_t<Tref> >
std::uint32_t GetOffsetToVftable(Tref &&obj)
{
    typedef std::remove_cvref_t<Tref> T;
    T *const p = const_cast<T*>( std::addressof(obj) );
    (void)dynamic_cast<void*>(p); // just so that it alters argument_to_RTCTV
    return (char*)argument_to_RTCTV - (char*)p;
}

#include <cassert> // assert
------------------------------------ REMOVE THIS
#include <iostream> // cout, endl
-------------------------------- REMOVE THIS
using std::cout, std::endl;

std::type_info const &GetCurrentObjectStaticTypeInfo(void *const obj,
std::uint32_t const offset_to_vftable_inside_object)
{
    cout << "GetTypeDescriptor called with obj=" << obj << ",
offset_to_vftable_inside_object=" << offset_to_vftable_inside_object
<< endl;

    // Step 1: Get address of vtable pointer
    void ***const ppvtable = (void***)( (char*)obj +
offset_to_vftable_inside_object );
    cout << "Address of vtable pointer = " << (void*)ppvtable << endl;

    // Step 2: Get vtable pointer
    void **const pvtable = *ppvtable;
    cout << "Address of vtable = " << (void*)pvtable << endl;

    // Step 3: Get RTTICompleteObjectLocator from vtable[-1]
    auto &locator = *(RTTICompleteObjectLocator*)pvtable[-1];
    cout << "Address of Locator = " << (void*)&locator << endl;

    std::uint32_t const offset_from_most_derived_to_base =
locator.offset - offset_to_vftable_inside_object;
    cout << "Offset to most-derived = " <<
offset_from_most_derived_to_base << endl;

    // Step 4: Compute most-derived object
    char *const most_derived = (char*)obj - offset_from_most_derived_to_base;
    cout << "Address of most derived = " << (void*)most_derived << endl;

    char *const imageBase = (char*)&locator - locator.pSelf;

    // Step 5: Walk base class descriptors
    auto *const chd = (RTTIClassHierarchyDescriptor*)(imageBase +
locator.pClassDescriptor);
    cout << "Address of hierarchy struct: " << (void*)chd << endl;

    cout << "Count bases = " << chd->numBaseClasses << endl;

    auto *const baseArray = (RTTIBaseClassArray*)(imageBase +
chd->pBaseClassArray);
    cout << "Address of array of Base descriptors: " <<
(void*)baseArray << endl;

    for ( unsigned i = 1u; i < chd->numBaseClasses; ++i )
    {
        cout << "Base No. " << i;

        auto *const base = (RTTIBaseClassDescriptor *)(imageBase +
baseArray->arrayOfBaseClassDescriptors[i]);

        cout << ", mdisp = " << base->where.mdisp
             << ", pdisp = " << base->where.pdisp
             << ", vdisp = " << base->where.vdisp
             << ", ";

        char *base_object = most_derived;

        if ( -1 == base->where.pdisp ) base_object += base->where.mdisp;
        else
        {
            char *const vbtable_ptr = *(char**)(most_derived +
base->where.pdisp);
            int32_t const dynamic_offset = *(int32_t*)(vbtable_ptr +
base->where.vdisp);
            base_object += dynamic_offset;
        }

        cout << "base object address = " << (void*)base_object << endl;
        std::type_info &ti = *(std::type_info*)(imageBase +
base->pTypeDescriptor);
        if ( base_object == obj )
        {
            cout << "\nReturning type_info for: " << ti.name() << endl;
            return ti;
        }
    }

    assert( nullptr == "Control should never reach here" );
    std::abort(); // if NDEBUG
}

#include <sstream> // stringstream

void *MyDynamicCast(
    void *const obj,
    uint32_t offset_to_vtable_pointer,
    std::type_info const &dst)
{
    std::type_info const &ti = GetCurrentObjectStaticTypeInfo(obj,
offset_to_vtable_pointer);

    void *hRuntime = ::GetModuleHandleA("vcruntime140.dll");
    if ( nullptr == hRuntime ) hRuntime = ::LoadLibraryA("vcruntime140.dll");
    if ( nullptr == hRuntime ) return nullptr;
    auto const fp = (void *(__cdecl *)(void*,long,void*,void*,int))
::GetProcAddress(hRuntime, "__RTDynamicCast");
    if ( nullptr == fp ) return nullptr;
    return fp(
        obj,
        offset_to_vtable_pointer,
        (void*)&ti,
        (void*)&dst,
        0);
}

struct Base0 { virtual ~Base0(){} }; // polymorphic
struct Base1 { void *p[600]; }; // --
NOT polymorphic
struct Base2 { virtual ~Base2(){} }; // polymorphic
struct Base3 { virtual ~Base3(){} }; // polymorphic
struct Base3a : Base0, Base3 {}; // polymorphic
struct Base3b : Base1, virtual Base3a {}; // polymorphic
struct BaseMiddle : Base3b { virtual ~BaseMiddle(){} }; // plymorphic
struct Base4 { virtual ~Base4(){} }; // polymorphic
struct Derived : virtual Base2, virtual BaseMiddle, Base4 {}; // polymorphic

template<class T>
void *Func1(Base4 *const pb4)
{
    // Let's see what the compiler gets by itself
    //return dynamic_cast<T*>(pb4); -- I have to comment this out
because it breaks __RTCastToVoid
    return nullptr;
}

template<class T>
void *Func2(Base4 *const pb4)
{
    std::uint32_t const offset = GetOffsetToVftable(*pb4);
    return MyDynamicCast(pb4,offset,typeid(T));
}

int main(void)
{
    using std::cout, std::endl;

    Derived objz;
    cout << "Derived : " << (void*)&objz << endl;
    cout << "Base0 : " << (void*)static_cast<Base0 *>(&objz) << endl;
    cout << "Base1 : " << (void*)static_cast<Base1 *>(&objz) << endl;
    cout << "Base2 : " << (void*)static_cast<Base2 *>(&objz) << endl;
    cout << "Base3 : " << (void*)static_cast<Base3 *>(&objz) << endl;
    cout << "Base3a : " << (void*)static_cast<Base3a*>(&objz) << endl;
    cout << "Base3b : " << (void*)static_cast<Base3b*>(&objz) << endl;
    cout << "BaseMiddle : " << (void*)static_cast<BaseMiddle*>(&objz) << endl;
    cout << "Base4 : " << (void*)static_cast<Base4 *>(&objz) << endl;
    cout << "===========================\n";

    Base4 *const pb4 = &objz;

    cout << "Func1 - Base3a : " << Func1<Base3b>(pb4) << endl;
    cout << "Func2 - Base3a : " << Func2<Base3b>(pb4) << endl;
}

Received on 2025-06-14 00:29:59