Recently I came across strange memory usage while using copy.deepcopy
.
I have the following code example:
import copy
import gc
import os
import psutil
from pympler.asizeof import asizeof
from humanize import filesize
class Foo(object):
__slots__ = ["name", "foos", "bars"]
def __init__(self, name):
self.name = name
self.foos = {}
self.bars = {}
def add_foo(self, foo):
self.foos[foo.name] = foo
def add_bar(self, bar):
self.bars[bar.name] = bar
def __getstate__(self):
return {k: getattr(self, k) for k in self.__slots__}
def __setstate__(self, state):
for k, v in state.items():
setattr(self, k, v)
class Bar(object):
__slots__ = ["name", "description"]
def __init__(self, name, description):
self.name = name
self.description = description
def __getstate__(self):
return {k: getattr(self, k) for k in self.__slots__}
def __setstate__(self, state):
for k, v in state.items():
setattr(self, k, v)
def get_ram():
return psutil.Process(os.getpid()).memory_info()[0]
def get_foo():
sub_foo = Foo("SubFoo1")
for i in range(5000):
sub_foo.add_bar(Bar("BarInSubFoo{}".format(i), "BarInSubFoo{}".format(i)))
foo = Foo("Foo")
foo.add_foo(sub_foo)
for i in range(5000):
foo.add_bar(Bar("BarInFoo{}".format(i), "BarInFoo{}".format(i)))
return foo
def main():
foo = get_foo()
foo_size = asizeof(foo)
gc.collect()
ram1 = get_ram()
foo_copy = copy.deepcopy(foo)
gc.collect()
ram2 = get_ram()
foo_copy_size = asizeof(foo_copy)
print("Original object size: {}, Ram before: {}\nCopied object size: {}, Ram after: {}\nDiff in ram: {}".format(
filesize.naturalsize(foo_size), filesize.naturalsize(ram1), filesize.naturalsize(foo_copy_size),
filesize.naturalsize(ram2), filesize.naturalsize(ram2-ram1)
))
if __name__ == "__main__":
main()
What I tried to do, is to test the amount of memory used by the program before and after the copy.deepcopy
. For this purpose, I created two classes.
I expected my memory usage to rise after the call to deepcopy in an amount equal to the size of the original object.
Strangly I got these results:
Original object size: 2.1 MB, Ram before: 18.6 MB
Copied object size: 2.1 MB, Ram after: 24.7 MB
Diff in ram: 6.1 MB
As you can see the difference in memory usage is aprox. 300% the size of the copied object.
** These results has been obtained using Python 3.8.5 on Windows 10 64 bit
What I tried?
Original object size: 2.3 MB, Ram before: 34.3 MB
Copied object size: 2.3 MB, Ram after: 46.2 MB
Diff in ram: 11.9 MB
__getstate__
got better results but far from what I was expectingFoo
object also got better results but also far from what I was expecting.pickle.dumps
& pickle.loads
in order to copy the object has produced the same results.Any toughts?
Some of that is probably accounted for because deepcopy
keeps a cache of all the objects it has visited to avoid getting stuck in an infinite loop (a set
I'm pretty sure). For this sort of thing, you should probably write your own efficient copy function. deepcopy
is written to be able to handle arbitrary inputs, not necessarily to be efficient.
If you want an efficient copying function, you can just write it yourself. This is sufficient for a deep copy, something to the effect of:
import copy
import gc
import os
import psutil
from pympler.asizeof import asizeof
from humanize import filesize
class Foo(object):
__slots__ = ["name", "foos", "bars"]
def __init__(self, name):
self.name = name
self.foos = {}
self.bars = {}
def add_foo(self, foo):
self.foos[foo.name] = foo
def add_bar(self, bar):
self.bars[bar.name] = bar
def copy(self):
new = Foo(self.name)
new.foos = {k:foo.copy() for k, foo in self.foos.items()}
new.bars = {k:bar.copy() for k, bar in self.bars.items()}
return new
class Bar(object):
__slots__ = ["name", "description"]
def __init__(self, name, description):
self.name = name
self.description = description
def copy(self):
return Bar(self.name, self.description)
def get_ram():
return psutil.Process(os.getpid()).memory_info()[0]
def get_foo():
sub_foo = Foo("SubFoo1")
for i in range(5000):
sub_foo.add_bar(Bar("BarInSubFoo{}".format(i), "BarInSubFoo{}".format(i)))
foo = Foo("Foo")
foo.add_foo(sub_foo)
for i in range(5000):
foo.add_bar(Bar("BarInFoo{}".format(i), "BarInFoo{}".format(i)))
return foo
def main():
foo = get_foo()
foo_size = asizeof(foo)
gc.collect()
ram1 = get_ram()
foo_copy = foo.copy()
gc.collect()
ram2 = get_ram()
foo_copy_size = asizeof(foo_copy)
print("Original object size: {}, Ram before: {}\nCopied object size: {}, Ram after: {}\nDiff in ram: {}".format(
filesize.naturalsize(foo_size), filesize.naturalsize(ram1), filesize.naturalsize(foo_copy_size),
filesize.naturalsize(ram2), filesize.naturalsize(ram2-ram1)
))
if __name__ == "__main__":
main()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With