Hi I would like a int and a float example that causes tearing for writing with an non-atomic values. I can't seem to reproduce this. It seems like something that is extremely rare or something I'm doing wrong.
Here is my test code which never prints. Is there anything wrong with it?
#include <windows.h>
#include <tchar.h>
#include <strsafe.h>
#define MAX_THREADS 64
#define BUF_SIZE 255
DWORD WINAPI MyThreadFunction( LPVOID lpParam );
void ErrorHandler(LPTSTR lpszFunction);
// Sample custom data structure for threads to use.
// This is passed by void pointer so it can be any data type
// that can be passed using a single void pointer (LPVOID).
typedef struct MyData {
    int val1;
    int val2;
} MYDATA, *PMYDATA;
int _tmain()
{
    DWORD   dwThreadIdArray[MAX_THREADS];
    HANDLE  hThreadArray[MAX_THREADS]; 
    // Create MAX_THREADS worker threads.
    for( int i=0; i<MAX_THREADS; i++ )
    {
        // Allocate memory for thread data.
        // Create the thread to begin execution on its own.
        hThreadArray[i] = CreateThread( 
            NULL,                   // default security attributes
            0,                      // use default stack size  
            MyThreadFunction,       // thread function name
            NULL,                   // argument to thread function 
            0,                      // use default creation flags 
            &dwThreadIdArray[i]);   // returns the thread identifier 
        // Check the return value for success.
        // If CreateThread fails, terminate execution. 
        // This will automatically clean up threads and memory. 
        if (hThreadArray[i] == NULL) 
        {
            ErrorHandler(TEXT("CreateThread"));
            ExitProcess(3);
        }
    } // End of main thread creation loop.
    // Wait until all threads have terminated.
    WaitForMultipleObjects(MAX_THREADS, hThreadArray, TRUE, INFINITE);
    // Close all thread handles and free memory allocations.
    for(int i=0; i<MAX_THREADS; i++)
    {
        CloseHandle(hThreadArray[i]);
    }
    return 0;
}
#pragma pack(push, 1)
struct Test
{
    char x1;
    char x1;
    char x3;
    int test;
    char x4;
    char x5;
};
Test* t = new Test(); //This is test code don't care about allocation or that it is a global.
#pragma pack(pop)
DWORD WINAPI MyThreadFunction( LPVOID lpParam ) 
{ 
    HANDLE hStdout;
    TCHAR msgBuf[BUF_SIZE];
    size_t cchStringSize;
    DWORD dwChars;
    // Make sure there is a console to receive output results. 
    hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
    if( hStdout == INVALID_HANDLE_VALUE )
        return 1;
    static int thread = 0;
    StringCchPrintf(msgBuf, BUF_SIZE, TEXT("Starting thread, %d\n"), ++thread); 
    StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
    WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars, NULL);
    t->test = 1;
    for (int i=0; i<1000000000;++i)
    {
        t->test = 1;
        t->test = 10000;
        t->test = 10000000;
        int result = t->test;
        if(result != 1 && result != 10000 && result != 10000000)
        {
            StringCchPrintf(msgBuf, BUF_SIZE, TEXT("Tearing occured = %d\n"), result); 
            StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
            WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars, NULL);
        }
    }
    return 0; 
} 
void ErrorHandler(LPTSTR lpszFunction) 
{ 
    // Retrieve the system error message for the last-error code.
    LPVOID lpMsgBuf;
    LPVOID lpDisplayBuf;
    DWORD dw = GetLastError(); 
    FormatMessage(
        FORMAT_MESSAGE_ALLOCATE_BUFFER | 
        FORMAT_MESSAGE_FROM_SYSTEM |
        FORMAT_MESSAGE_IGNORE_INSERTS,
        NULL,
        dw,
        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
        (LPTSTR) &lpMsgBuf,
        0, NULL );
    // Display the error message.
    lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, 
        (lstrlen((LPCTSTR) lpMsgBuf) + lstrlen((LPCTSTR) lpszFunction) + 40) * sizeof(TCHAR)); 
    StringCchPrintf((LPTSTR)lpDisplayBuf, 
        LocalSize(lpDisplayBuf) / sizeof(TCHAR),
        TEXT("%s failed with error %d: %s"), 
        lpszFunction, dw, lpMsgBuf); 
    MessageBox(NULL, (LPCTSTR) lpDisplayBuf, TEXT("Error"), MB_OK); 
    // Free error-handling buffer allocations.
    LocalFree(lpMsgBuf);
    LocalFree(lpDisplayBuf);
}
I can trigger torn reads / writes with this test code which forces the contended uint32_t to cross a cache line boundary when compiled with Visual Studio 2013 (only seems to happen in Release builds):
#include <algorithm>
#include <atomic>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <thread>
#include <vector>
using namespace std;
atomic<bool> gDone = false;
vector<uint32_t> vals = {0x11111111, 0x22222222, 0x33333333, 0x44444444, };
mutex ioMutex;
void writeVal(volatile uint32_t* pVal, int tid) {
    while (!gDone) {
        *pVal = vals[tid];
        const auto currentVal = *pVal;
        auto findIt = find(begin(vals), end(vals), currentVal);
        if (findIt == end(vals)) {
            unique_lock<mutex> ul(ioMutex);
            cout << "Detected torn read/write! pVal = 0x" << setbase(16) << setw(8) << setfill('0')
                 << reinterpret_cast<uintptr_t>(pVal) << " currentVal = 0x" << currentVal << endl;
            gDone = true;
        }
    }
}
int main() {
    vector<char> memVec(16 * 1024);
    char* first = &memVec[0];
    const auto cacheLineSize = 64;
    char* crossesCacheLine =
        reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(first + cacheLineSize) & ~(cacheLineSize - 1)) - 2);
    uint32_t* tearableUint32 = reinterpret_cast<uint32_t*>(crossesCacheLine);
    vector<thread> threads(vals.size());
    for (int i = 0; i != threads.size(); ++i) {
        threads[i] = thread([=] { writeVal(tearableUint32, i); });
    }
    for (auto& t : threads) {
        t.join();
    }
}
Output:
Detected torn read/write! pVal = 0x004bc43e currentVal = 0x11112222
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With