Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Python : multiprocessing and Array of c_char_p

I'm launching 3 processes and I want them to put a string into a shared array, at the index corresponding to the process (i).

Look at the code below, the output generated is:

['test 0', None, None]
['test 1', 'test 1', None]
['test 2', 'test 2', 'test 2']

Why 'test 0' get overwritten by test 1, and test 1 by test 2?

What I want is (order is not important) :

['test 0', None, None]
['test 0', 'test 1', None]
['test 0', 'test 1', 'test 2']

The code :

#!/usr/bin/env python

import multiprocessing
from multiprocessing import Value, Lock, Process, Array
import ctypes
from ctypes import c_int, c_char_p

class Consumer(multiprocessing.Process):
    def __init__(self, task_queue, result_queue, arr, lock):
            multiprocessing.Process.__init__(self)
            self.task_queue = task_queue
            self.result_queue = result_queue
            self.arr = arr
            self.lock = lock

    def run(self):
            proc_name = self.name
            while True:
                next_task = self.task_queue.get()
                if next_task is None:
                    self.task_queue.task_done()
                    break            
                answer = next_task(arr=self.arr, lock=self.lock)
                self.task_queue.task_done()
                self.result_queue.put(answer)
            return

class Task(object):
    def __init__(self, i):
        self.i = i

    def __call__(self, arr=None, lock=None):
        with lock:
            arr[self.i] = "test %d" % self.i
            print arr[:]

    def __str__(self):
        return 'ARC'

    def run(self):
        print 'IN'

if __name__ == '__main__':
   tasks = multiprocessing.JoinableQueue()
   results = multiprocessing.Queue()

   arr = Array(ctypes.c_char_p, 3)

   lock = multiprocessing.Lock()

   num_consumers = multiprocessing.cpu_count() * 2
   consumers = [Consumer(tasks, results, arr, lock) for i in xrange(num_consumers)]

   for w in consumers:
      w.start()

   for i in xrange(3):
      tasks.put(Task(i))

   for i in xrange(num_consumers):
      tasks.put(None)

I'm running Python 2.7.3 (Ubuntu)

like image 892
Ujoux Avatar asked Jan 08 '13 14:01

Ujoux


1 Answers

This problem seems similar to this one. There, J.F. Sebastian speculated that the assignment to arr[i] points arr[i] to a memory address that was only meaningful to the subprocess making the assignment. The other subprocesses retrieve garbage when looking at that address.

There are at least two ways to avoid this problem. One is to use a multiprocessing.manager list:

import multiprocessing as mp

class Consumer(mp.Process):
    def __init__(self, task_queue, result_queue, lock, lst):
            mp.Process.__init__(self)
            self.task_queue = task_queue
            self.result_queue = result_queue
            self.lock = lock
            self.lst = lst

    def run(self):
            proc_name = self.name
            while True:
                next_task = self.task_queue.get()
                if next_task is None:
                    self.task_queue.task_done()
                    break            
                answer = next_task(lock = self.lock, lst = self.lst)
                self.task_queue.task_done()
                self.result_queue.put(answer)
            return

class Task(object):
    def __init__(self, i):
        self.i = i

    def __call__(self, lock, lst):
        with lock:
            lst[self.i] = "test {}".format(self.i)
            print([lst[i] for i in range(3)])

if __name__ == '__main__':
   tasks = mp.JoinableQueue()
   results = mp.Queue()
   manager = mp.Manager()
   lst = manager.list(['']*3)

   lock = mp.Lock()
   num_consumers = mp.cpu_count() * 2
   consumers = [Consumer(tasks, results, lock, lst) for i in xrange(num_consumers)]

   for w in consumers:
      w.start()

   for i in xrange(3):
      tasks.put(Task(i))

   for i in xrange(num_consumers):
      tasks.put(None)

   tasks.join()

Another way is to use a shared array with a fixed size such as mp.Array('c', 10).

import multiprocessing as mp

class Consumer(mp.Process):
    def __init__(self, task_queue, result_queue, arr, lock):
            mp.Process.__init__(self)
            self.task_queue = task_queue
            self.result_queue = result_queue
            self.arr = arr
            self.lock = lock

    def run(self):
            proc_name = self.name
            while True:
                next_task = self.task_queue.get()
                if next_task is None:
                    self.task_queue.task_done()
                    break            
                answer = next_task(arr = self.arr, lock = self.lock)
                self.task_queue.task_done()
                self.result_queue.put(answer)
            return

class Task(object):
    def __init__(self, i):
        self.i = i

    def __call__(self, arr, lock):
        with lock:
            arr[self.i].value = "test {}".format(self.i)
            print([a.value for a in arr])

if __name__ == '__main__':
   tasks = mp.JoinableQueue()
   results = mp.Queue()
   arr = [mp.Array('c', 10) for i in range(3)]

   lock = mp.Lock()
   num_consumers = mp.cpu_count() * 2
   consumers = [Consumer(tasks, results, arr, lock) for i in xrange(num_consumers)]

   for w in consumers:
      w.start()

   for i in xrange(3):
      tasks.put(Task(i))

   for i in xrange(num_consumers):
      tasks.put(None)

   tasks.join()

I speculate that the reason why this works when mp.Array(ctypes.c_char_p, 3) does not, is because mp.Array('c', 10) has a fixed size so the memory address never changes, while mp.Array(ctypes.c_char_p, 3) has a variable size, so the memory address might change when arr[i] is assigned to a bigger string.

Perhaps this is what the docs are warning about when it states,

Although it is possible to store a pointer in shared memory remember that this will refer to a location in the address space of a specific process. However, the pointer is quite likely to be invalid in the context of a second process and trying to dereference the pointer from the second process may cause a crash.

like image 162
unutbu Avatar answered Nov 13 '22 10:11

unutbu