I'm very new to Python but I'm trying to produce a 2D Gaussian fit for some data. Specifically, stellar fluxes linked to certain positions in a coordinate system/grid. However not all of the positions in my grid have corresponding flux values. I don't really want to set these values to zero in case it biases my fit, but I can't seem to set them to nan
and still get my Gaussian fit to work. This is the code I'm using (modified slightly from here):
import numpy
import scipy
from numpy import *
from scipy import optimize
def gaussian(height, center_x, center_y, width_x, width_y):
width_x = float(width_x)
width_y = float(width_y)
return lambda x,y: height*exp(-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)
def moments(data):
total = nansum(data)
X, Y = indices(data.shape)
center_x = nansum(X*data)/total
center_y = nansum(Y*data)/total
row = data[int(center_x), :]
col = data[:, int(center_y)]
width_x = nansum(sqrt(abs((arange(col.size)-center_y)**2*col))/nansum(col))
width_y = nansum(sqrt(abs((arange(row.size)-center_x)**2*row))/nansum(row))
height = nanmax(data)
return height, center_x, center_y, width_x, width_y
def fitgaussian(data):
params = moments(data)
errorfunction = lambda p: ravel(gaussian(*p)(*indices(data.shape)) - data)
p, success = optimize.leastsq(errorfunction, params)
return p
parameters = fitgaussian(data)
fit = gaussian(*parameters)
My flux values are in a 2D array called data
. The code works if I have 0
instead of nan
values in this array, but otherwise my parameters
always come out as [nan nan nan nan nan]
. If there's a way to fix this, I would really appreciate your insight! The more detailed the explanation, the better. Thanks in advance!
The obvious thing to do is remove the NaNs from data
. Doing so, however, also requires that the corresponding positions in the 2D X
, Y
location arrays also be removed:
X, Y = np.indices(data.shape)
mask = ~np.isnan(data)
x = X[mask]
y = Y[mask]
data = data[mask]
Now you can use optimize.leastsq
(or the newer, simpler optimize.curve_fit
)
to fit the data to the model function:
p, success = optimize.leastsq(errorfunction, params, args=(x, y, data))
For example, if we generate some random data
with NaNs
data = make_data(shape)
so that
import matplotlib.pyplot as plt
plt.imshow(data)
plt.show()
looks like
with the white spots showing where there are NaN values, then
import numpy as np
from scipy import optimize
np.set_printoptions(precision=4)
def gaussian(p, x, y):
height, center_x, center_y, width_x, width_y = p
return height*np.exp(-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)
def moments(data):
total = np.nansum(data)
X, Y = np.indices(data.shape)
center_x = np.nansum(X*data)/total
center_y = np.nansum(Y*data)/total
row = data[int(center_x), :]
col = data[:, int(center_y)]
width_x = np.nansum(np.sqrt(abs((np.arange(col.size)-center_y)**2*col))
/np.nansum(col))
width_y = np.nansum(np.sqrt(abs((np.arange(row.size)-center_x)**2*row))
/np.nansum(row))
height = np.nanmax(data)
return height, center_x, center_y, width_x, width_y
def errorfunction(p, x, y, data):
return gaussian(p, x, y) - data
def fitgaussian(data):
params = moments(data)
X, Y = np.indices(data.shape)
mask = ~np.isnan(data)
x = X[mask]
y = Y[mask]
data = data[mask]
p, success = optimize.leastsq(errorfunction, params, args=(x, y, data))
return p
def make_data(shape):
h, w = shape
p = 50, h/2.0, w/2.0, h/3.0, w/5.0
print('Actual parameters: {}'.format(np.array(p)))
X, Y = np.indices(shape)
data = gaussian(p, X, Y) + np.random.random(shape)
mask = np.random.random(shape) < 0.3
data[mask] = np.nan
return data
shape = 100, 200
data = make_data(shape)
X, Y = np.indices(shape)
parameters = fitgaussian(data)
print('Fitted parameters: {}'.format(parameters))
fit = gaussian(parameters, X, Y)
yields
Actual parameters: [ 50. 50. 100. 33.3333 40. ]
Fitted parameters: [ 50.2908 49.9992 99.9927 33.7039 40.6149]
Just remove all the values that don't have a corresponding flux value. Removing pairs of values won't matter if you don't have anything on the y-axis at that point.
This should remove all values that don't have flux values if the empty values are equal to ''
# assumes data.shape = (1, 3) where data[:,0:1] is the x,y axis
# data[:,2] contains the flux values
data = numpy.delete(data, numpy.where(data[:,3] == ''), axis=0)
this will do the job if empty values are equal to nan
data = numpy.delete(data, numpy.where(data[:,3] == numpy.nan), axis=0)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With