Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Userland interrupt timer access such as via KeQueryInterruptTime (or similar)

Is there a "Nt" or similar (i.e. non-kernelmode-driver) function equivalent for KeQueryInterruptTime or anything similar? There seems to be no such thing as NtQueryInterruptTime, at least I've not found it.

What I want is some kind of reasonably accurate and reliable, monotonic timer (thus not QPC) which is reasonably efficient and doesn't have surprises as an overflowing 32-bit counter, and no unnecessary "smartness", no time zones, or complicated structures.

So ideally, I want something like timeGetTime with a 64 bit value. It doesn't even have to be the same timer.
There exists GetTickCount64 starting with Vista, which would be acceptable as such, but I'd not like to break XP support only for such a stupid reason.

Reading the quadword at 0x7FFE0008 as indicated here ... well, works ... and it proves that indeed the actual internal counter is 64 bits under XP (it's also as fast as it could possibly get), but meh... let's not talk about what a kind of nasty hack it is to read some unknown, hardcoded memory location.

There must certainly be something in between calling an artificially stupefied (scaling a 64 bit counter down to 32 bits) high-level API function and reading a raw memory address?

like image 362
Damon Avatar asked Oct 20 '25 04:10

Damon


1 Answers

Here's an example of a thread-safe wrapper for GetTickCount() extending the tick count value to 64 bits and in that being equivalent to GetTickCount64().

To avoid undesired counter roll overs, make sure to call this function a few times every 49.7 days. You can even have a dedicated thread whose only purpose would be to call this function and then sleep some 20 days in an infinite loop.

ULONGLONG MyGetTickCount64(void)
{
  static volatile LONGLONG Count = 0;
  LONGLONG curCount1, curCount2;
  LONGLONG tmp;

  curCount1 = InterlockedCompareExchange64(&Count, 0, 0);

  curCount2 = curCount1 & 0xFFFFFFFF00000000;
  curCount2 |= GetTickCount();

  if ((ULONG)curCount2 < (ULONG)curCount1)
  {
    curCount2 += 0x100000000;
  }

  tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);

  if (tmp == curCount1)
  {
    return curCount2;
  }
  else
  {
    return tmp;
  }
}

EDIT: And here's a complete application that tests MyGetTickCount64().

// Compiled with Open Watcom C 1.9: wcl386.exe /we /wx /q gettick.c

#include <windows.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>

//
// The below code is an ugly implementation of InterlockedCompareExchange64()
// that is apparently missing in Open Watcom C 1.9.
// It must work with MSVC++ too, however.
//
UINT8 Cmpxchg8bData[] =
{
  0x55,             // push      ebp
  0x89, 0xE5,       // mov       ebp, esp
  0x57,             // push      edi
  0x51,             // push      ecx
  0x53,             // push      ebx
  0x8B, 0x7D, 0x10, // mov       edi, [ebp + 0x10]
  0x8B, 0x07,       // mov       eax, [edi]
  0x8B, 0x57, 0x04, // mov       edx, [edi + 0x4]
  0x8B, 0x7D, 0x0C, // mov       edi, [ebp + 0xc]
  0x8B, 0x1F,       // mov       ebx, [edi]
  0x8B, 0x4F, 0x04, // mov       ecx, [edi + 0x4]
  0x8B, 0x7D, 0x08, // mov       edi, [ebp + 0x8]
  0xF0,             // lock:
  0x0F, 0xC7, 0x0F, // cmpxchg8b [edi]
  0x5B,             // pop       ebx
  0x59,             // pop       ecx
  0x5F,             // pop       edi
  0x5D,             // pop       ebp
  0xC3              // ret
};

LONGLONG (__cdecl *Cmpxchg8b)(LONGLONG volatile* Dest, LONGLONG* Exch, LONGLONG* Comp) =
  (LONGLONG (__cdecl *)(LONGLONG volatile*, LONGLONG*, LONGLONG*))Cmpxchg8bData;

LONGLONG MyInterlockedCompareExchange64(LONGLONG volatile* Destination,
                                        LONGLONG Exchange,
                                        LONGLONG Comparand)
{
  return Cmpxchg8b(Destination, &Exchange, &Comparand);
}

#ifdef InterlockedCompareExchange64
#undef InterlockedCompareExchange64
#endif

#define InterlockedCompareExchange64(Destination, Exchange, Comparand) \
  MyInterlockedCompareExchange64(Destination, Exchange, Comparand)

//
// This stuff makes a thread-safe printf().
// We don't want characters output by one thread to be mixed
// with characters output by another. We want printf() to be
// "atomic".
// We use a critical section around vprintf() to achieve "atomicity".
//
static CRITICAL_SECTION PrintfCriticalSection;

int ts_printf(const char* Format, ...)
{
  int count;
  va_list ap;

  EnterCriticalSection(&PrintfCriticalSection);

  va_start(ap, Format);
  count = vprintf(Format, ap);
  va_end(ap);

  LeaveCriticalSection(&PrintfCriticalSection);

  return count;
}

#define TICK_COUNT_10MS_INCREMENT 0x800000

//
// This is the simulated tick counter.
// Its low 32 bits are going to be returned by
// our, simulated, GetTickCount().
//
// TICK_COUNT_10MS_INCREMENT is what the counter is
// incremented by every time. The value is so chosen
// that the counter quickly overflows in its
// low 32 bits.
//
static volatile LONGLONG SimulatedTickCount = 0;

//
// This is our simulated 32-bit GetTickCount()
// that returns a count that often overflows.
//
ULONG SimulatedGetTickCount(void)
{
  return (ULONG)SimulatedTickCount;
}

//
// This thread function will increment the simulated tick counter
// whose value's low 32 bits we'll be reading in SimulatedGetTickCount().
//
DWORD WINAPI SimulatedTickThread(LPVOID lpParameter)
{
  UNREFERENCED_PARAMETER(lpParameter);

  for (;;)
  {
    LONGLONG c;

    Sleep(10);

    // Get the counter value, add TICK_COUNT_10MS_INCREMENT to it and
    // store the result back.
    c = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);
    InterlockedCompareExchange64(&SimulatedTickCount, c + TICK_COUNT_10MS_INCREMENT, c) != c);
  }

  return 0;
}

volatile LONG CountOfObserved32bitOverflows = 0;
volatile LONG CountOfObservedUpdateRaces = 0;

//
// This prints statistics that includes the true 64-bit value of
// SimulatedTickCount that we can't get from SimulatedGetTickCount() as it
// returns only its lower 32 bits.
//
// The stats also include:
// - the number of times that MyGetTickCount64() observes an overflow of
//   SimulatedGetTickCount()
// - the number of times MyGetTickCount64() fails to update its internal
//   counter because of a concurrent update in another thread.
//
void PrintStats(void)
{
  LONGLONG true64bitCounter = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);

  ts_printf("  0x%08X`%08X <- true 64-bit count; ovfs: ~%d; races: %d\n",
            (ULONG)(true64bitCounter >> 32),
            (ULONG)true64bitCounter,
            CountOfObserved32bitOverflows,
            CountOfObservedUpdateRaces);
}

//
// This is our poor man's implementation of GetTickCount64()
// on top of GetTickCount().
//
// It's thread safe.
//
// When used with actual GetTickCount() instead of SimulatedGetTickCount()
// it must be called at least a few times in 49.7 days to ensure that
// it doesn't miss any overflows in GetTickCount()'s return value.
//
ULONGLONG MyGetTickCount64(void)
{
  static volatile LONGLONG Count = 0;
  LONGLONG curCount1, curCount2;
  LONGLONG tmp;

  curCount1 = InterlockedCompareExchange64(&Count, 0, 0);

  curCount2 = curCount1 & 0xFFFFFFFF00000000;
  curCount2 |= SimulatedGetTickCount();

  if ((ULONG)curCount2 < (ULONG)curCount1)
  {
    curCount2 += 0x100000000;

    InterlockedIncrement(&CountOfObserved32bitOverflows);
  }

  tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);

  if (tmp != curCount1)
  {
    curCount2 = tmp;

    InterlockedIncrement(&CountOfObservedUpdateRaces);
  }

  return curCount2;
}

//
// This is an error counter. If a thread that uses MyGetTickCount64() notices
// any problem with what MyGetTickCount64() returns, it bumps up this error
// counter and stops. If one of threads sees a non-zero value in this
// counter due to an error in another thread, it stops as well.
//
volatile LONG Error = 0;

//
// This is a thread function that will be using MyGetTickCount64(),
// validating its return value and printing some stats once in a while.
//
// This function is meant to execute concurrently in multiple threads
// to create race conditions inside of MyGetTickCount64() and test it.
//
DWORD WINAPI TickUserThread(LPVOID lpParameter)
{
  DWORD user = (DWORD)lpParameter; // thread number
  ULONGLONG ticks[4];

  ticks[3] = ticks[2] = ticks[1] = MyGetTickCount64();

  while (!Error)
  {
    ticks[0] = ticks[1];
    ticks[1] = MyGetTickCount64();

    // Every ~100 ms sleep a little (slightly lowers CPU load, to about 90%)
    if (ticks[1] > ticks[2] + TICK_COUNT_10MS_INCREMENT * 10L)
    {
      ticks[2] = ticks[1];
      Sleep(1 + rand() % 20);
    }

    // Every ~1000 ms print the last value from MyGetTickCount64().
    // Thread 1 also prints stats here.
    if (ticks[1] > ticks[3] + TICK_COUNT_10MS_INCREMENT * 100L)
    {
      ticks[3] = ticks[1];
      ts_printf("%u:0x%08X`%08X\n", user, (ULONG)(ticks[1] >> 32), (ULONG)ticks[1]);

      if (user == 1)
      {
        PrintStats();
      }
    }

    if (ticks[0] > ticks[1])
    {
      ts_printf("%u:Non-monotonic tick counts: 0x%016llX > 0x%016llX!\n",
                user,
                ticks[0],
                ticks[1]);
      PrintStats();
      InterlockedIncrement(&Error);
      return -1;
    }
    else if (ticks[0] + 0x100000000 <= ticks[1])
    {
      ts_printf("%u:Too big tick count jump: 0x%016llX -> 0x%016llX!\n",
                user,
                ticks[0],
                ticks[1]);
      PrintStats();
      InterlockedIncrement(&Error);
      return -1;
    }

    Sleep(0); // be nice, yield to other threads.
  }

  return 0;
}

//
// This prints stats upon Ctrl+C and terminates the program.
//
BOOL WINAPI ConsoleEventHandler(DWORD Event)
{
  if (Event == CTRL_C_EVENT)
  {
    PrintStats();
  }

  return FALSE;
}

int main(void)
{
  HANDLE simulatedTickThreadHandle;
  HANDLE tickUserThreadHandle;
  DWORD dummy;

  // This is for the missing InterlockedCompareExchange64() workaround.
  VirtualProtect(Cmpxchg8bData, sizeof(Cmpxchg8bData), PAGE_EXECUTE_READWRITE, &dummy);

  InitializeCriticalSection(&PrintfCriticalSection);

  if (!SetConsoleCtrlHandler(&ConsoleEventHandler, TRUE))
  {
    ts_printf("SetConsoleCtrlHandler(&ConsoleEventHandler) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // Start the tick simulator thread.

  simulatedTickThreadHandle = CreateThread(NULL, 0, &SimulatedTickThread, NULL, 0, NULL);

  if (simulatedTickThreadHandle == NULL)
  {
    ts_printf("CreateThread(&SimulatedTickThread) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // Start one thread that'll be using MyGetTickCount64().

  tickUserThreadHandle = CreateThread(NULL, 0, &TickUserThread, (LPVOID)2, 0, NULL);
  if (tickUserThreadHandle == NULL)
  {
    ts_printf("CreateThread(&TickUserThread) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // The other thread using MyGetTickCount64() will be the main thread.

  TickUserThread((LPVOID)1);

  //
  // The app terminates upon any error condition detected in TickUserThread()
  // in any of the threads or by Ctrl+C.
  //

  return 0;
}

As a test I've been running this test app under Windows XP for 5+ hours on an otherwise idle machine that has 2 CPUs (idle, to avoid potential long starvation times and therefore avoid missing counter overflows that occur every 5 seconds) and it's still doing well.

Here's the latest output from the console:

2:0x00000E1B`C8800000
1:0x00000E1B`FA800000
  0x00000E1B`FA800000 <- true 64-bit count; ovfs: ~3824; races: 110858

As you can see, MyGetTickCount64() has observed 3824 32-bit overflows and failed to update the value of Count with its second InterlockedCompareExchange64() 110858 times. So, overflows indeed occur and the last number means that the variable is, in fact, being concurrently updated by the two threads.

You can also see that the 64-bit tick counts that the two threads receive from MyGetTickCount64() in TickUserThread() don't have anything missing in the top 32 bits and are pretty close to the actual 64-bit tick count in SimulatedTickCount, whose 32 low bits are returned by SimulatedGetTickCount(). 0x00000E1BC8800000 is visually behind 0x00000E1BFA800000 due to thread scheduling and infrequent stat prints, it's behind by exactly 100*TICK_COUNT_10MS_INCREMENT, or 1 second. Internally, of course, the difference is much smaller.

Now, on availability of InterlockedCompareExchange64()... It's a bit odd that it's officially available since Windows Vista and Windows Server 2003. Server 2003 is in fact build from the same code base as Windows XP.

But the most important thing here is that this function is built on top of the Pentium CMPXCHG8B instruction that's been available since 1998 or earlier (1), (2). And I can see this instruction in my Windows XP's (SP3) binaries. It's in ntkrnlpa.exe/ntoskrnl.exe (the kernel) and ntdll.dll (the DLL that exports kernel's NtXxxx() functions that everything's built upon). Look for a byte sequence of 0xF0, 0x0F, 0xC7 and disassemble the code around that place to see that these bytes aren't there coincidentally.

You can check availability of this instruction through the CPUID instruction (EDX bit 8 of CPUID function 0x00000001 and function 0x80000001) and refuse to run instead of crashing if the instruction isn't there, but these days you're unlikely to find a machine that doesn't support this instruction. If you do, it won't be a good machine for Windows XP and probably your application as well anyways.

like image 50
Alexey Frunze Avatar answered Oct 21 '25 22:10

Alexey Frunze