Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

pythonic way to index list of objects

I have a list of objects. Each object has two fields

obj1.status = 2
obj1.timestamp = 19211

obj2.status = 3
obj2.timestamp = 14211

obj_list = [obj1, obj2]

I will keep adding / deleting objects in the list and also changing attributes of objects, for example I may change ob1.status to 5.
Now I have two dicts

dict1 - <status, object>
dict2 - <timestamp, object> 

How do I design a simple solution so that whenever I modify/delete/insert elements in the list, the maps get automatically updated. I am interested in a pythonic solution that is elegant and extensible. For example in future, I should be able to easily add another attribute and dict for that as well

Also for simplicity, let us assume all attributes value are different. For example no two objects will have same status

like image 539
dark knight Avatar asked May 27 '16 17:05

dark knight


3 Answers

You could override the __setattr__ on the objects to update the indexes whenever you set the values. You can use a weakref dictionary for the indexes so that when you delete objects and are no longer using them, they are automatically removed from the indexes.

import weakref
from bunch import Bunch


class MyObject(object):

    indexes = Bunch()  # Could just use dict()

    def __init__(self, **kwargs):
        super(MyObject, self).__init__()
        for k, v in kwargs.items():
            setattr(self, k, v)

    def __setattr__(self, name, value):
        try:
            index = MyObject.indexes[name]
        except KeyError:
            index = weakref.WeakValueDictionary()
            MyObject.indexes[name] = index
        try:
            old_val = getattr(self, name)
            del index[old_val]
        except (KeyError, AttributeError):
            pass
        object.__setattr__(self, name, value)
        index[value] = self


obj1 = MyObject(status=1, timestamp=123123)
obj2 = MyObject(status=2, timestamp=2343)


print MyObject.indexes.status[1]
print obj1.indexes.timestamp[2343]
obj1.status = 5
print obj2.indexes['status'][5]

I used a Bunch here because it allows you to access the indexes using .name notation, but you could just use a dict instead and use the ['name'] syntax.

like image 165
Brendan Abel Avatar answered Oct 28 '22 10:10

Brendan Abel


One approach here would be to create a class level dict for MyObj and define updating behavior using property decorator. Every time an object is changed or added, it is reflected in the respected dictionaries associated with the class.

Edit: as @BrendanAbel points out, using weakref.WeakValueDictionary in place of dict handles object deletion from class level dicts.

from datetime import datetime
from weakref import WeakValueDictionary

DEFAULT_TIME = datetime.now()


class MyObj(object):
    """
    A sample clone of your object
    """
    timestamps = WeakValueDictionary()
    statuses   = WeakValueDictionary()

    def __init__(self, status=0, timestamp=DEFAULT_TIME):
        self._status    = status
        self._timestamp = timestamp

        self.status     = status
        self.timestamp  = timestamp

    def __update_class(self):
        MyObj.timestamps.update({self.timestamp: self})
        MyObj.statuses.update({self.status: self})

    def __delete_from_class(self):
        maybe_self = MyObj.statuses.get(self.status, None)
        if maybe_self is self is not None:
            del MyObj.statuses[self.status]

        maybe_self = MyObj.timestamps.get(self.timestamp, None)
        if maybe_self is self is not None:
            del MyObj.timestamps[self.timestamp]

    @property
    def status(self):
        return self._status

    @status.setter
    def status(self, val):
        self.__delete_from_class()
        self._status = val
        self.__update_class()

    @property
    def timestamp(self):
        return self._timestamp

    @timestamp.setter
    def timestamp(self, val):
        self.__delete_from_class()
        self._timestamp = val
        self.__update_class()

    def __repr__(self):
        return "MyObj: status={} timestamp={}".format(self.status, self.timestamp)


obj1 = MyObj(1)
obj2 = MyObj(2)
obj3 = MyObj(3)

lst = [obj1, obj2, obj3]

# In [87]: q.lst
# Out[87]: 
# [MyObj: status=1 timestamp=2016-05-27 13:43:38.158363,
#  MyObj: status=2 timestamp=2016-05-27 13:43:38.158363,
#  MyObj: status=3 timestamp=2016-05-27 13:43:38.158363]

# In [88]: q.MyObj.statuses[1]
# Out[88]: MyObj: status=1 timestamp=2016-05-27 13:43:38.158363

# In [89]: q.MyObj.statuses[1].status = 42

# In [90]: q.MyObj.statuses[42]
# Out[90]: MyObj: status=42 timestamp=2016-05-27 13:43:38.158363

# In [91]: q.MyObj.statuses[1]
# ---------------------------------------------------------------------------
# KeyError                                  Traceback (most recent call last)
# <ipython-input-91-508ab072bfc4> in <module>()
# ----> 1 q.MyObj.statuses[1]

# KeyError: 1
like image 22
hilberts_drinking_problem Avatar answered Oct 28 '22 12:10

hilberts_drinking_problem


For a collection to be aware of mutation of its elements, there must be some connection between the elements and that collection which can communicate when changes happen. For this reason, we either must bind an instance to a collection or proxy the elements of the collection so that change-communication doesn't leak into the element's code.

A note about the implementation I'm going to present, the proxying method only works if the attributes are changed by direct setting, not inside of a method. A more complex book-keeping system would be necessary then.

Additionally, it assumes that exact duplicates of all attributes won't exist, given that you require the indices be built out of set objects instead of list

from collections import defaultdict

class Proxy(object):
    def __init__(self, proxy, collection):
        self._proxy = proxy
        self._collection = collection

    def __getattribute__(self, name):
        if name in ("_proxy", "_collection"):
           return object.__getattribute__(self, name)
        else:
           proxy = self._proxy
           return getattr(proxy, name)

    def __setattr__(self, name, value):
        if name in ("_proxy", "collection"):
           object.__setattr__(self, name, value)
        else:
           proxied = self._proxy
           collection = self._collection
           old = getattr(proxied, name)
           setattr(proxy, name, value)
           collection.signal_change(proxied, name, old, value)


class IndexedCollection(object):
     def __init__(self, items, index_names):
         self.items = list(items)
         self.index_names = set(index_names)
         self.indices = defaultdict(lambda: defaultdict(set))

     def __len__(self):
         return len(self.items)

     def __iter__(self):
         for i in range(len(self)):
             yield self[i]    

     def remove(self, obj):
         self.items.remove(obj)
         self._remove_from_indices(obj)

     def __getitem__(self, i):
         # Ensure consumers get a proxy, not a raw object
         return Proxy(self.items[i], self)

     def append(self, obj):
         self.items.append(obj)
         self._add_to_indices(obj)

     def _add_to_indices(self, obj):
          for indx in self.index_names:
              key = getattr(obj, indx)
              self.indices[indx][key].add(obj)

     def _remove_from_indices(self, obj):
          for indx in self.index_names:
              key = getattr(obj, indx)
              self.indices[indx][key].remove(obj)

     def signal_change(self, obj, indx, old, new):
          if indx not in self.index_names:
               return
          # Tell the container to update its indices for a
          # particular attribute and object
          self.indices[indx][old].remove(obj)
          self.indices[indx][new].add(obj)
like image 21
mobiusklein Avatar answered Oct 28 '22 12:10

mobiusklein