I wish to create a sklearn GMM object with a predefined set of means, weights, and covariances ( on a grid ).
I managed to do it:
from sklearn.mixture import GaussianMixture
import numpy as np
def get_grid_gmm(subdivisions=[10,10,10], variance=0.05 ):
n_gaussians = reduce(lambda x, y: x*y,subdivisions)
step = [ 1.0/(2*subdivisions[0]), 1.0/(2*subdivisions[1]), 1.0/(2*subdivisions[2])]
means = np.mgrid[ step[0] : 1.0-step[0]: complex(0,subdivisions[0]),
step[1] : 1.0-step[1]: complex(0,subdivisions[1]),
step[2] : 1.0-step[2]: complex(0,subdivisions[2])]
means = np.reshape(means,[-1,3])
covariances = variance*np.ones_like(means)
weights = (1.0/n_gaussians)*np.ones(n_gaussians)
gmm = GaussianMixture(n_components=n_gaussians, covariance_type='spherical' )
gmm.weights_ = weights
gmm.covariances_ = covariances
gmm.means_ = means
return gmm
def main():
xx = np.random.rand(100,3)
gmm = get_grid_gmm()
y= gmm.predict_proba(xx)
if __name__ == "__main__":
main()
The problem is its missing the gmm.predict_proba()
method that I need to use later on.
How can I overcome this?
UPDATE : I updated the code to be a complete example that shows the error
UPDATE2
I updated the code according to comments and answers
from sklearn.mixture import GaussianMixture
import numpy as np
def get_grid_gmm(subdivisions=[10,10,10], variance=0.05 ):
n_gaussians = reduce(lambda x, y: x*y,subdivisions)
step = [ 1.0/(2*subdivisions[0]), 1.0/(2*subdivisions[1]), 1.0/(2*subdivisions[2])]
means = np.mgrid[ step[0] : 1.0-step[0]: complex(0,subdivisions[0]),
step[1] : 1.0-step[1]: complex(0,subdivisions[1]),
step[2] : 1.0-step[2]: complex(0,subdivisions[2])]
means = np.reshape(means,[3,-1])
covariances = variance*np.ones(n_gaussians)
cov_type = 'spherical'
weights = (1.0/n_gaussians)*np.ones(n_gaussians)
gmm = GaussianMixture(n_components=n_gaussians, covariance_type=cov_type )
gmm.weights_ = weights
gmm.covariances_ = covariances
gmm.means_ = means
from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky
gmm.precisions_cholesky_ = _compute_precision_cholesky(covariances, cov_type)
gmm.precisions_ = gmm.precisions_cholesky_ ** 2
return gmm
def main():
xx = np.random.rand(100,3)
gmm = get_grid_gmm()
_, y = gmm._estimate_log_prob(xx)
y = np.exp(y)
if __name__ == "__main__":
main()
No more errors but _estimate_log_prob and predict_proba do not produce the same result for a fitted GMM. Why could that be?
Since you don't train the model but just use the function for estimation, you don't need to use the object but you could use the same function they use under the hood. You could try _estimate_log_gaussian_prob
. That is what they do internaly I think.
Have a look at the source:
in particular at the base class https://github.com/scikit-learn/scikit-learn/blob/ab93d657eb4268ac20c4db01c48065b5a1bfe80d/sklearn/mixture/base.py#L342
that is calling the specific method, that in turn is calling a function https://github.com/scikit-learn/scikit-learn/blob/ab93d657eb4268ac20c4db01c48065b5a1bfe80d/sklearn/mixture/gaussian_mixture.py#L671
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With