Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Set stack size programmatically on Windows

Is it possible in WinAPI to set stack size for the current thread at runtime like setrlimit does on Linux? I mean to increase the reserved stack size for the current thread if it is too small for the current requirements. This is in a library that may be called by threads from other programming languages, so it's not an option to set stack size at compile time.

If not, any ideas about a solution like an assembly trampoline that changes the stack pointer to a dynamically allocated memory block?

FAQ: Proxy thread is a surefire solution (unless the caller thread has extremely small stack). However, thread switching seems a performance killer. I need substantial amount of stack for recursion or for _alloca. This is also for performance, because heap allocation is slow, especially if multiple threads allocate from heap in parallel (they get blocked by the same libc/CRT mutex, so the code becomes serial).

like image 303
Serge Rogatch Avatar asked Jul 22 '17 18:07

Serge Rogatch


2 Answers

you can not full swap stack in current thread (allocate self, delete old) in library code because in old stack - return addresses, may be pointers to variables in stack, etc.

and you can not expand stack (virtual memory for it already allocated (reserved/commit) and not expandable.

however possible allocate temporary stack and switch to this stack during call. you must in this case save old StackBase and StackLimit from NT_TIB (look this structure in winnt.h), set new values (you need allocate memory for new stack), do call (for switch stack you need some assembly code - you can not do this only on c/c++) and return original StackBase and StackLimit. in kernelmode exist support for this - KeExpandKernelStackAndCallout

however in user mode exist Fibers - this is very rare used, but look like perfectly match to task. with Fiber we can create additional stack/execution context inside current thread.

so in general solution is next (for library):

on DLL_THREAD_ATTACH :

  1. convert thread to fiber (ConvertThreadToFiber) (if it return false check also GetLastError for ERROR_ALREADY_FIBER - this is also ok code)
  2. and create own Fiber by call CreateFiberEx

we do this only once. than, every time when your procedure is called, which require large stack space:

  1. remember the current fiber by call GetCurrentFiber
  2. setup task for your fiber
  3. switch to your fiber by call SwitchToFiber
  4. call procedure inside fiber
  5. return to original fiber (saved from call GetCurrentFiber) again by SwitchToFiber

and finally on DLL_THREAD_DETACH you need:

  1. delete your fiber by DeleteFiber
  2. convert fiber to thread by call ConvertFiberToThread but only in case initial ConvertThreadToFiber return true (if was ERROR_ALREADY_FIBER- let who first convert thread to fiber convert it back - this is not your task in this case)

you need some (usual small) data associated with your fiber / thread. this must be of course per thread variable. so you need use __declspec(thread) for declare this data. or direct use TLS (or which modern c++ features exist for this)

demo implementation is next:

typedef ULONG (WINAPI * MY_EXPAND_STACK_CALLOUT) (PVOID Parameter);

class FIBER_DATA 
{
public:
    PVOID _PrevFiber, _MyFiber;
    MY_EXPAND_STACK_CALLOUT _pfn;
    PVOID _Parameter;
    ULONG _dwError;
    BOOL _bConvertToThread;

    static VOID CALLBACK _FiberProc( PVOID lpParameter)
    {
        reinterpret_cast<FIBER_DATA*>(lpParameter)->FiberProc();
    }

    VOID FiberProc()
    {
        for (;;)
        {
            _dwError = _pfn(_Parameter);
            SwitchToFiber(_PrevFiber);
        }
    }

public:

    ~FIBER_DATA()
    {
        if (_MyFiber)
        {
            DeleteFiber(_MyFiber);
        }

        if (_bConvertToThread)
        {
            ConvertFiberToThread();
        }
    }

    FIBER_DATA()
    {
        _bConvertToThread = FALSE, _MyFiber = 0;
    }

    ULONG Create(SIZE_T dwStackCommitSize, SIZE_T dwStackReserveSize);

    ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
    {
        _PrevFiber = GetCurrentFiber();
        _pfn = pfn;
        _Parameter = Parameter;
        SwitchToFiber(_MyFiber);
        return _dwError;
    }
};

__declspec(thread) FIBER_DATA* g_pData;

ULONG FIBER_DATA::Create(SIZE_T dwStackCommitSize, SIZE_T dwStackReserveSize)
{
    if (ConvertThreadToFiber(this))
    {
        _bConvertToThread = TRUE;
    }
    else
    {
        ULONG dwError = GetLastError();

        if (dwError != ERROR_ALREADY_FIBER)
        {
            return dwError;
        }
    }

    return (_MyFiber = CreateFiberEx(dwStackCommitSize, dwStackReserveSize, 0, _FiberProc, this)) ? NOERROR : GetLastError();
}

void OnDetach()
{
    if (FIBER_DATA* pData = g_pData)
    {
        delete pData;
    }
}

ULONG OnAttach()
{
    if (FIBER_DATA* pData = new FIBER_DATA)
    {
        if (ULONG dwError = pData->Create(2*PAGE_SIZE, 512 * PAGE_SIZE))
        {
            delete pData;

            return dwError;
        }

        g_pData = pData;

        return NOERROR;
    }

    return ERROR_NO_SYSTEM_RESOURCES;
}

ULONG WINAPI TestCallout(PVOID param)
{
    DbgPrint("TestCallout(%s)\n", param);

    return NOERROR;
}

ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
    if (FIBER_DATA* pData = g_pData)
    {
        return pData->DoCallout(pfn, Parameter);
    }

    return ERROR_GEN_FAILURE;
}

if (!OnAttach())//DLL_THREAD_ATTACH
{
    DoCallout(TestCallout, "Demo Task #1");
    DoCallout(TestCallout, "Demo Task #2");
    OnDetach();//DLL_THREAD_DETACH
}

also note that all fibers executed in single thread context - multiple fibers associated with thread can not execute in concurrent - only sequential, and you yourself control switch time. so not need any additional synchronization. and SwitchToFiber - this is complete user mode proc. which executed very fast, never fail (because never allocate any resources)


update


despite use __declspec(thread) FIBER_DATA* g_pData; more simply (less code), better for implementation direct use TlsGetValue / TlsSetValue and allocate FIBER_DATA on first call inside thread, but not for all threads. also __declspec(thread) not correct worked (not worked at all) in XP for dll. so some modification can be

at DLL_PROCESS_ATTACH allocate your TLS slot gTlsIndex = TlsAlloc();

and free it on DLL_PROCESS_DETACH

if (gTlsIndex != TLS_OUT_OF_INDEXES) TlsFree(gTlsIndex);

on every DLL_THREAD_DETACH notification call

void OnThreadDetach()
{
    if (FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex))
    {
        delete pData;
    }
}

and DoCallout need be modified in next way

ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
    FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex);

    if (!pData)
    {
        // this code executed only once on first call

        if (!(pData = new FIBER_DATA))
        {
            return ERROR_NO_SYSTEM_RESOURCES;
        }

        if (ULONG dwError = pData->Create(512*PAGE_SIZE, 4*PAGE_SIZE))// or what stack size you need
        {
            delete pData;
            return dwError;
        }

        TlsSetValue(gTlsIndex, pData);
    }

    return pData->DoCallout(pfn, Parameter);
}

so instead allocate stack for every new thread on DLL_THREAD_ATTACH via OnAttach() much better alocate it only for threads when really need (at first call)

and this code can potential have problems with fibers, if someone else also try use fibers. say in msdn example code not check for ERROR_ALREADY_FIBER in case ConvertThreadToFiber return 0. so we can wait that this case will be incorrect handled by main application if we before it decide create fiber and it also try use fiber after us. also ERROR_ALREADY_FIBER not worked in xp (begin from vista).

so possible and another solution - yourself create thread stack, and temporary switch to it doring call which require large stack space. main need not only allocate space for stack and swap esp (or rsp) but not forget correct establish StackBase and StackLimit in NT_TIB - it is necessary and sufficient condition (otherwise exceptions and guard page extension will be not worked).

despite this alternate solution require more code (manually create thread stack and stack switch) it will be work on xp too and nothing affect in situation when somebody else also try using fibers in thread

typedef ULONG (WINAPI * MY_EXPAND_STACK_CALLOUT) (PVOID Parameter);

extern "C" PVOID __fastcall SwitchToStack(PVOID param, PVOID stack);

struct FIBER_DATA
{
    PVOID _Stack, _StackLimit, _StackPtr, _StackBase;
    MY_EXPAND_STACK_CALLOUT _pfn;
    PVOID _Parameter;
    ULONG _dwError;

    static void __fastcall FiberProc(FIBER_DATA* pData, PVOID stack)
    {
        for (;;)
        {
            pData->_dwError = pData->_pfn(pData->_Parameter);

            // StackLimit can changed during _pfn call
            pData->_StackLimit = ((PNT_TIB)NtCurrentTeb())->StackLimit;

            stack = SwitchToStack(0, stack);
        }
    }

    ULONG Create(SIZE_T Reserve, SIZE_T Commit);

    ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
    {
        _pfn = pfn;
        _Parameter = Parameter;

        PNT_TIB tib = (PNT_TIB)NtCurrentTeb();

        PVOID StackBase = tib->StackBase, StackLimit = tib->StackLimit;

        tib->StackBase = _StackBase, tib->StackLimit = _StackLimit;

        _StackPtr = SwitchToStack(this, _StackPtr);

        tib->StackBase = StackBase, tib->StackLimit = StackLimit;

        return _dwError;
    }

    ~FIBER_DATA()
    {
        if (_Stack)
        {
            VirtualFree(_Stack, 0, MEM_RELEASE);
        }
    }

    FIBER_DATA()
    {
        _Stack = 0;
    }
};

ULONG FIBER_DATA::Create(SIZE_T Reserve, SIZE_T Commit)
{
    Reserve = (Reserve + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
    Commit = (Commit + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);

    if (Reserve < Commit || !Reserve)
    {
        return ERROR_INVALID_PARAMETER;
    }

    if (PBYTE newStack = (PBYTE)VirtualAlloc(0, Reserve, MEM_RESERVE, PAGE_NOACCESS))
    {
        union {
            PBYTE newStackBase;
            void** ppvStack;
        };

        newStackBase = newStack + Reserve;

        PBYTE newStackLimit = newStackBase - Commit;

        if (newStackLimit = (PBYTE)VirtualAlloc(newStackLimit, Commit, MEM_COMMIT, PAGE_READWRITE))
        {
            if (Reserve == Commit || VirtualAlloc(newStackLimit - PAGE_SIZE, PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE|PAGE_GUARD))
            {
                _StackBase = newStackBase, _StackLimit = newStackLimit, _Stack = newStack;

#if defined(_M_IX86) 
                *--ppvStack = FiberProc;
                ppvStack -= 4;// ebp,esi,edi,ebx
#elif defined(_M_AMD64)
                ppvStack -= 5;// x64 space
                *--ppvStack = FiberProc;
                ppvStack -= 8;// r15,r14,r13,r12,rbp,rsi,rdi,rbx
#else
#error "not supported"
#endif

                _StackPtr = ppvStack;

                return NOERROR;
            }
        }

        VirtualFree(newStack, 0, MEM_RELEASE);
    }

    return GetLastError();
}

ULONG gTlsIndex;

ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
    FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex);

    if (!pData)
    {
        // this code executed only once on first call

        if (!(pData = new FIBER_DATA))
        {
            return ERROR_NO_SYSTEM_RESOURCES;
        }

        if (ULONG dwError = pData->Create(512*PAGE_SIZE, 4*PAGE_SIZE))
        {
            delete pData;
            return dwError;
        }

        TlsSetValue(gTlsIndex, pData);
    }

    return pData->DoCallout(pfn, Parameter);
}

void OnThreadDetach()
{
    if (FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex))
    {
        delete pData;
    }
}

and assembly code for SwitchToStack : on x86

@SwitchToStack@8 proc
    push    ebx
    push    edi
    push    esi
    push    ebp
    xchg    esp,edx
    mov     eax,edx
    pop     ebp
    pop     esi
    pop     edi
    pop     ebx
    ret
@SwitchToStack@8 endp

and for x64:

SwitchToStack proc
    push    rbx
    push    rdi
    push    rsi
    push    rbp
    push    r12
    push    r13
    push    r14
    push    r15
    xchg    rsp,rdx
    mov     rax,rdx
    pop     r15
    pop     r14
    pop     r13
    pop     r12
    pop     rbp
    pop     rsi
    pop     rdi
    pop     rbx
    ret
SwitchToStack endp

usage/test can be next:

gTlsIndex = TlsAlloc();//DLL_PROCESS_ATTACH

if (gTlsIndex != TLS_OUT_OF_INDEXES)
{
    TestStackMemory();

    DoCallout(TestCallout, "test #1");

    //play with stack, excepions, guard pages
    PSTR str = (PSTR)alloca(256);
    DoCallout(zTestCallout, str);
    DbgPrint("str=%s\n", str);

    DoCallout(TestCallout, "test #2");

    OnThreadDetach();//DLL_THREAD_DETACH

    TlsFree(gTlsIndex);//DLL_PROCESS_DETACH
}

void TestMemory(PVOID AllocationBase)
{
    MEMORY_BASIC_INFORMATION mbi;
    PVOID BaseAddress = AllocationBase;
    while (VirtualQuery(BaseAddress, &mbi, sizeof(mbi)) >= sizeof(mbi) && mbi.AllocationBase == AllocationBase)
    {
        BaseAddress = (PBYTE)mbi.BaseAddress + mbi.RegionSize;
        DbgPrint("[%p, %p) %p %08x %08x\n", mbi.BaseAddress, BaseAddress, (PVOID)(mbi.RegionSize >> PAGE_SHIFT), mbi.State, mbi.Protect);
    }
}

void TestStackMemory()
{
    MEMORY_BASIC_INFORMATION mbi;
    if (VirtualQuery(_AddressOfReturnAddress(), &mbi, sizeof(mbi)) >= sizeof(mbi))
    {
        TestMemory(mbi.AllocationBase);
    }
}

ULONG WINAPI zTestCallout(PVOID Parameter)
{
    TestStackMemory();

    alloca(5*PAGE_SIZE);

    TestStackMemory();

    __try
    {
        *(int*)0=0;
    } 
    __except(EXCEPTION_EXECUTE_HANDLER)
    {
        DbgPrint("exception %x handled\n", GetExceptionCode());
    }

    strcpy((PSTR)Parameter, "zTestCallout demo");

    return NOERROR;
}

ULONG WINAPI TestCallout(PVOID param)
{
    TestStackMemory();

    DbgPrint("TestCallout(%s)\n", param);

    return NOERROR;
}
like image 67
RbMm Avatar answered Oct 29 '22 04:10

RbMm


The maximum stack size is determined when the thread is created. It cannot be modified after that time.

like image 32
David Heffernan Avatar answered Oct 29 '22 02:10

David Heffernan