I have the following functions defined. For some reason, stack_data()
always returns an empty array and I cannot figure out why. Does anyone have any suggestions?
General suggestions on improving coding style, form, readability, etc. would be very helpful. General debugging tips would be great too.
Example of what should be happening:
input:
print(stack_data(np.array([[1,1,1,2,2,2,3,3,3],[4,4,4,5,5,5,6,6,6],[7,7,7,8,8,8,9,9,9]]), 0.33))
output: [4,1,4,2,2,3,4,4,4.5,7,7,7.5,9,9]
def _fullsweep_ranges(spec_data):
start = [x for x in range(0,len(spec_data[:,1])) \
if spec_data[x,1] == spec_data[:,1].min()]
stop = [x for x in range(0,len(spec_data[:,1])) \
if spec_data[x,1] == spec_data[:,1].max()]
return zip(start,stop)
def _remove_partial_fullsweeps(spec_data):
ranges = _fullsweep_ranges(spec_data)
first_min_index = ranges[0][0]
last_max_index = ranges[-1][1]
return spec_data[first_min_index:last_max_index+1,:]
def _flatten_data(spec_data):
row = 0
flat_data = []
running = False
while (row < np.shape(spec_data)[0] - 1):
if not(running):
start = row
running = True
if spec_data[row,1] != spec_data[row+1,1]:
stop = row
running = False
time = np.mean(spec_data[start:stop,0], axis=0)
start_freq = spec_data[start,1]
freq_step = np.mean(spec_data[start:stop,2], axis=0)
bin_size = spec_data[0,3] * (stop - start)
avg_subspectra = np.mean(spec_data[start:stop,4:], axis=0)
data_row = [time, start_freq, freq_step, bin_size, avg_subspectra]
flat_data.append(data_row)
row += 1
return np.array(flat_data)
def _split_row(row, num_overlap):
return row[:num_overlap], row[num_overlap:-num_overlap], row[-num_overlap:]
def stack_data(spec_data, percent_overlap):
"""
input: spectrum data file and percent that subspectra are overlapping
output: 2d numpy array where each row is a fullsweep with overlapping
regions averaged, first col is the center time of the fullsweep,
second col is the start frequency of the fullsweep (this should
be the same for each row), and third col is freq_step
"""
spec_data = _remove_partial_fullsweeps(spec_data)
spec_data = _flatten_data(spec_data)
ranges = _fullsweep_ranges(spec_data)
num_overlap = math.ceil(len(spec_data[0,4:]) * percent_overlap)
output = []
for start,stop in ranges:
center_time = np.mean(spec_data[start:stop+1,0], axis=0)
start_freq = spec_data[start,1]
freq_step = np.mean(spec_data[start:stop+1,2], axis=0)
output_row = [center_time, start_freq, freq_step]
split_data = [_split_row(row, num_overlap) for \
row in spec_data[start:stop+1]]
for i, beg, mid, end in enumerate(split_data):
if i == 0:
output_row.extend(beg)
output_row.extend(mid)
if i == len(split_data) - 1:
output_row.extend(end)
else:
next_beg = split_data[i+1][0]
averaged = np.mean([end, next_beg], axis=0)
output_row.extend(averaged)
output.append(output_row)
return np.array(output)
The error comes from _flatten_data()
in the return-line:
return np.array(flat_data)
because flat_data
in the example that you posted is:
[[nan, 1, nan, 0, array([ nan, nan, nan, nan, nan])], [nan, 4, nan, 0, array([ nan, nan, nan, nan, nan])]]
which is not a representation of a multidimensional array.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With