Is it possible in WinAPI to set stack size for the current thread at runtime like setrlimit
does on Linux?
I mean to increase the reserved stack size for the current thread if it is too small for the current requirements.
This is in a library that may be called by threads from other programming languages, so it's not an option to set stack size at compile time.
If not, any ideas about a solution like an assembly trampoline that changes the stack pointer to a dynamically allocated memory block?
FAQ: Proxy thread is a surefire solution (unless the caller thread has extremely small stack). However, thread switching seems a performance killer. I need substantial amount of stack for recursion or for _alloca
. This is also for performance, because heap allocation is slow, especially if multiple threads allocate from heap in parallel (they get blocked by the same libc
/CRT
mutex, so the code becomes serial).
you can not full swap stack in current thread (allocate self, delete old) in library code because in old stack - return addresses, may be pointers to variables in stack, etc.
and you can not expand stack (virtual memory for it already allocated (reserved/commit) and not expandable.
however possible allocate temporary stack and switch to this stack during call. you must in this case save old StackBase
and StackLimit
from NT_TIB
(look this structure in winnt.h
), set new values (you need allocate memory for new stack), do call (for switch stack you need some assembly code - you can not do this only on c/c++) and return original StackBase
and StackLimit
. in kernelmode exist support for this - KeExpandKernelStackAndCallout
however in user mode exist Fibers - this is very rare used, but look like perfectly match to task. with Fiber we can create additional stack/execution context inside current thread.
so in general solution is next (for library):
on DLL_THREAD_ATTACH
:
ConvertThreadToFiber
) (if it return false
check also
GetLastError
for ERROR_ALREADY_FIBER
- this is also ok code)CreateFiberEx
we do this only once. than, every time when your procedure is called, which require large stack space:
GetCurrentFiber
SwitchToFiber
GetCurrentFiber
)
again by SwitchToFiber
and finally on DLL_THREAD_DETACH
you need:
DeleteFiber
ConvertFiberToThread
but only
in case initial ConvertThreadToFiber
return true
(if was
ERROR_ALREADY_FIBER
- let who first convert thread to fiber convert
it back - this is not your task in this case)you need some (usual small) data associated with your fiber / thread. this must be of course per thread variable. so you need use __declspec(thread)
for declare this data. or direct use TLS
(or which modern c++ features exist for this)
demo implementation is next:
typedef ULONG (WINAPI * MY_EXPAND_STACK_CALLOUT) (PVOID Parameter);
class FIBER_DATA
{
public:
PVOID _PrevFiber, _MyFiber;
MY_EXPAND_STACK_CALLOUT _pfn;
PVOID _Parameter;
ULONG _dwError;
BOOL _bConvertToThread;
static VOID CALLBACK _FiberProc( PVOID lpParameter)
{
reinterpret_cast<FIBER_DATA*>(lpParameter)->FiberProc();
}
VOID FiberProc()
{
for (;;)
{
_dwError = _pfn(_Parameter);
SwitchToFiber(_PrevFiber);
}
}
public:
~FIBER_DATA()
{
if (_MyFiber)
{
DeleteFiber(_MyFiber);
}
if (_bConvertToThread)
{
ConvertFiberToThread();
}
}
FIBER_DATA()
{
_bConvertToThread = FALSE, _MyFiber = 0;
}
ULONG Create(SIZE_T dwStackCommitSize, SIZE_T dwStackReserveSize);
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
_PrevFiber = GetCurrentFiber();
_pfn = pfn;
_Parameter = Parameter;
SwitchToFiber(_MyFiber);
return _dwError;
}
};
__declspec(thread) FIBER_DATA* g_pData;
ULONG FIBER_DATA::Create(SIZE_T dwStackCommitSize, SIZE_T dwStackReserveSize)
{
if (ConvertThreadToFiber(this))
{
_bConvertToThread = TRUE;
}
else
{
ULONG dwError = GetLastError();
if (dwError != ERROR_ALREADY_FIBER)
{
return dwError;
}
}
return (_MyFiber = CreateFiberEx(dwStackCommitSize, dwStackReserveSize, 0, _FiberProc, this)) ? NOERROR : GetLastError();
}
void OnDetach()
{
if (FIBER_DATA* pData = g_pData)
{
delete pData;
}
}
ULONG OnAttach()
{
if (FIBER_DATA* pData = new FIBER_DATA)
{
if (ULONG dwError = pData->Create(2*PAGE_SIZE, 512 * PAGE_SIZE))
{
delete pData;
return dwError;
}
g_pData = pData;
return NOERROR;
}
return ERROR_NO_SYSTEM_RESOURCES;
}
ULONG WINAPI TestCallout(PVOID param)
{
DbgPrint("TestCallout(%s)\n", param);
return NOERROR;
}
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
if (FIBER_DATA* pData = g_pData)
{
return pData->DoCallout(pfn, Parameter);
}
return ERROR_GEN_FAILURE;
}
if (!OnAttach())//DLL_THREAD_ATTACH
{
DoCallout(TestCallout, "Demo Task #1");
DoCallout(TestCallout, "Demo Task #2");
OnDetach();//DLL_THREAD_DETACH
}
also note that all fibers executed in single thread context - multiple fibers associated with thread can not execute in concurrent - only sequential, and you yourself control switch time. so not need any additional synchronization. and SwitchToFiber
- this is complete user mode proc. which executed very fast, never fail (because never allocate any resources)
update
despite use __declspec(thread) FIBER_DATA* g_pData;
more simply (less code), better for implementation direct use TlsGetValue
/ TlsSetValue
and allocate FIBER_DATA
on first call inside thread, but not for all threads. also __declspec(thread)
not correct worked (not worked at all) in XP for dll. so some modification can be
at DLL_PROCESS_ATTACH
allocate your TLS slot gTlsIndex = TlsAlloc();
and free it on DLL_PROCESS_DETACH
if (gTlsIndex != TLS_OUT_OF_INDEXES) TlsFree(gTlsIndex);
on every DLL_THREAD_DETACH
notification call
void OnThreadDetach()
{
if (FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex))
{
delete pData;
}
}
and DoCallout
need be modified in next way
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex);
if (!pData)
{
// this code executed only once on first call
if (!(pData = new FIBER_DATA))
{
return ERROR_NO_SYSTEM_RESOURCES;
}
if (ULONG dwError = pData->Create(512*PAGE_SIZE, 4*PAGE_SIZE))// or what stack size you need
{
delete pData;
return dwError;
}
TlsSetValue(gTlsIndex, pData);
}
return pData->DoCallout(pfn, Parameter);
}
so instead allocate stack for every new thread on DLL_THREAD_ATTACH
via OnAttach()
much better alocate it only for threads when really need (at first call)
and this code can potential have problems with fibers, if someone else also try use fibers. say in msdn example code not check for ERROR_ALREADY_FIBER
in case ConvertThreadToFiber
return 0. so we can wait that this case will be incorrect handled by main application if we before it decide create fiber and it also try use fiber after us. also ERROR_ALREADY_FIBER
not worked in xp (begin from vista).
so possible and another solution - yourself create thread stack, and temporary switch to it doring call which require large stack space. main need not only allocate space for stack and swap esp (or rsp) but not forget correct establish StackBase
and StackLimit
in NT_TIB
- it is necessary and sufficient condition (otherwise exceptions and guard page extension will be not worked).
despite this alternate solution require more code (manually create thread stack and stack switch) it will be work on xp too and nothing affect in situation when somebody else also try using fibers in thread
typedef ULONG (WINAPI * MY_EXPAND_STACK_CALLOUT) (PVOID Parameter);
extern "C" PVOID __fastcall SwitchToStack(PVOID param, PVOID stack);
struct FIBER_DATA
{
PVOID _Stack, _StackLimit, _StackPtr, _StackBase;
MY_EXPAND_STACK_CALLOUT _pfn;
PVOID _Parameter;
ULONG _dwError;
static void __fastcall FiberProc(FIBER_DATA* pData, PVOID stack)
{
for (;;)
{
pData->_dwError = pData->_pfn(pData->_Parameter);
// StackLimit can changed during _pfn call
pData->_StackLimit = ((PNT_TIB)NtCurrentTeb())->StackLimit;
stack = SwitchToStack(0, stack);
}
}
ULONG Create(SIZE_T Reserve, SIZE_T Commit);
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
_pfn = pfn;
_Parameter = Parameter;
PNT_TIB tib = (PNT_TIB)NtCurrentTeb();
PVOID StackBase = tib->StackBase, StackLimit = tib->StackLimit;
tib->StackBase = _StackBase, tib->StackLimit = _StackLimit;
_StackPtr = SwitchToStack(this, _StackPtr);
tib->StackBase = StackBase, tib->StackLimit = StackLimit;
return _dwError;
}
~FIBER_DATA()
{
if (_Stack)
{
VirtualFree(_Stack, 0, MEM_RELEASE);
}
}
FIBER_DATA()
{
_Stack = 0;
}
};
ULONG FIBER_DATA::Create(SIZE_T Reserve, SIZE_T Commit)
{
Reserve = (Reserve + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
Commit = (Commit + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
if (Reserve < Commit || !Reserve)
{
return ERROR_INVALID_PARAMETER;
}
if (PBYTE newStack = (PBYTE)VirtualAlloc(0, Reserve, MEM_RESERVE, PAGE_NOACCESS))
{
union {
PBYTE newStackBase;
void** ppvStack;
};
newStackBase = newStack + Reserve;
PBYTE newStackLimit = newStackBase - Commit;
if (newStackLimit = (PBYTE)VirtualAlloc(newStackLimit, Commit, MEM_COMMIT, PAGE_READWRITE))
{
if (Reserve == Commit || VirtualAlloc(newStackLimit - PAGE_SIZE, PAGE_SIZE, MEM_COMMIT, PAGE_READWRITE|PAGE_GUARD))
{
_StackBase = newStackBase, _StackLimit = newStackLimit, _Stack = newStack;
#if defined(_M_IX86)
*--ppvStack = FiberProc;
ppvStack -= 4;// ebp,esi,edi,ebx
#elif defined(_M_AMD64)
ppvStack -= 5;// x64 space
*--ppvStack = FiberProc;
ppvStack -= 8;// r15,r14,r13,r12,rbp,rsi,rdi,rbx
#else
#error "not supported"
#endif
_StackPtr = ppvStack;
return NOERROR;
}
}
VirtualFree(newStack, 0, MEM_RELEASE);
}
return GetLastError();
}
ULONG gTlsIndex;
ULONG DoCallout(MY_EXPAND_STACK_CALLOUT pfn, PVOID Parameter)
{
FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex);
if (!pData)
{
// this code executed only once on first call
if (!(pData = new FIBER_DATA))
{
return ERROR_NO_SYSTEM_RESOURCES;
}
if (ULONG dwError = pData->Create(512*PAGE_SIZE, 4*PAGE_SIZE))
{
delete pData;
return dwError;
}
TlsSetValue(gTlsIndex, pData);
}
return pData->DoCallout(pfn, Parameter);
}
void OnThreadDetach()
{
if (FIBER_DATA* pData = (FIBER_DATA*)TlsGetValue(gTlsIndex))
{
delete pData;
}
}
and assembly code for SwitchToStack
: on x86
@SwitchToStack@8 proc
push ebx
push edi
push esi
push ebp
xchg esp,edx
mov eax,edx
pop ebp
pop esi
pop edi
pop ebx
ret
@SwitchToStack@8 endp
and for x64:
SwitchToStack proc
push rbx
push rdi
push rsi
push rbp
push r12
push r13
push r14
push r15
xchg rsp,rdx
mov rax,rdx
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rsi
pop rdi
pop rbx
ret
SwitchToStack endp
usage/test can be next:
gTlsIndex = TlsAlloc();//DLL_PROCESS_ATTACH
if (gTlsIndex != TLS_OUT_OF_INDEXES)
{
TestStackMemory();
DoCallout(TestCallout, "test #1");
//play with stack, excepions, guard pages
PSTR str = (PSTR)alloca(256);
DoCallout(zTestCallout, str);
DbgPrint("str=%s\n", str);
DoCallout(TestCallout, "test #2");
OnThreadDetach();//DLL_THREAD_DETACH
TlsFree(gTlsIndex);//DLL_PROCESS_DETACH
}
void TestMemory(PVOID AllocationBase)
{
MEMORY_BASIC_INFORMATION mbi;
PVOID BaseAddress = AllocationBase;
while (VirtualQuery(BaseAddress, &mbi, sizeof(mbi)) >= sizeof(mbi) && mbi.AllocationBase == AllocationBase)
{
BaseAddress = (PBYTE)mbi.BaseAddress + mbi.RegionSize;
DbgPrint("[%p, %p) %p %08x %08x\n", mbi.BaseAddress, BaseAddress, (PVOID)(mbi.RegionSize >> PAGE_SHIFT), mbi.State, mbi.Protect);
}
}
void TestStackMemory()
{
MEMORY_BASIC_INFORMATION mbi;
if (VirtualQuery(_AddressOfReturnAddress(), &mbi, sizeof(mbi)) >= sizeof(mbi))
{
TestMemory(mbi.AllocationBase);
}
}
ULONG WINAPI zTestCallout(PVOID Parameter)
{
TestStackMemory();
alloca(5*PAGE_SIZE);
TestStackMemory();
__try
{
*(int*)0=0;
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
DbgPrint("exception %x handled\n", GetExceptionCode());
}
strcpy((PSTR)Parameter, "zTestCallout demo");
return NOERROR;
}
ULONG WINAPI TestCallout(PVOID param)
{
TestStackMemory();
DbgPrint("TestCallout(%s)\n", param);
return NOERROR;
}
The maximum stack size is determined when the thread is created. It cannot be modified after that time.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With