Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to make a numpy array field in django?

I want a numpy array field in django so that I can do something like this

from example.models import Series
import numpy as np
array = np.array([1, 2, 3])
model = Series.objects.create(id=1, array=array)
model = Series.objects.get(id=1)
assert np.array_equal(array, model.array)

Essentially, the field should serialize the numpy array to binary and deserialize it automatically. Currently, I'm just doing this:

import base64
import numpy as np
from django.db import models

class Series(models.Model):
    id = models.IntegerField(primary_key=True, unique=True)
    array = models.BinaryField()

    def get_array():
        return np.frombuffer(base64.decodebytes(self.array), dtype=np.float32)

    def set_array(array):
        self.array = base64.b64encode(array)

I'd prefer it if this were a reusable field because I have many models that will need to store a numpy array. For example:

class Series(models.Model):
    array = NumpyArrayField(dtype=np.float32)

So, how can I write a NumpyArrayField class that accomplishes this?

I tried doing the following (copying the source code for BinaryField)

import base64

import numpy as np
from django.db import models

class NumpyArrayField(models.Field):
    empty_values = [None]

    def __init__(self, dtype, *args, **kwargs):
        self.dtype = dtype
        super(NumpyArrayField, self).__init__(*args, **kwargs)

    def deconstruct(self):
        name, path, args, kwargs = super(NumpyArrayField, self).deconstruct()
        kwargs['dtype'] = self.dtype
        return name, path, args, kwargs

    def get_internal_type(self):
        return 'NumpyArrayField'

    def get_placeholder(self, value, compiler, connection):
        return connection.ops.binary_placeholder_sql(value)

    def get_default(self):
        if self.has_default() and not callable(self.default):
            return self.default
        default = super(NumpyArrayField, self).get_default()
        if default == '':
            return b''
        return default

    def get_db_prep_value(self, value, connection, prepared=False):
        value = super(NumpyArrayField, self).get_db_prep_value(value, connection, prepared)
        value = base64.b64encode(value)
        if value is not None:
            return connection.Database.Binary(value)
        return value

    def value_to_string(self, obj):
        return base64.b64encode(obj).decode('ascii')

    def to_python(self, value):
        return np.frombuffer(base64.decodebytes(value), dtype=self.dtype)


class Series(models.Model):
    id = models.IntegerField(primary_key=True, unique=True)
    array = NumpyArrayField(dtype=np.int32)

And the migrations ran fine, but I got a django.db.utils.OperationalError: table example_series has no column named array error.

like image 354
michaelsnowden Avatar asked Oct 11 '17 23:10

michaelsnowden


1 Answers

I saved the numpy array to Django model with MySQL, here's:

  1. set the field to Django BinaryField

from django.db import models

np_field = models.BinaryField()

  1. transform numpy array to python byte using pickle dumps, then encoded by base64

np_bytes = pickle.dumps(np_array)

np_base64 = base64.b64encode(np_bytes)

model.np_field = np_base64

  1. get the numpy array from django model

np_bytes = base64.b64decode(model.np_field)

np_array = pickle.loads(np_bytes)

like image 159
Kawa Yeung Avatar answered Oct 23 '22 04:10

Kawa Yeung