I have turned dataframe that has a tuple of length 2 as index
1 2 -1
(0, 1) 0 1 0
(0, 2) 1 0 0
(0, -1) 0 0 0
(1, 1) 1 0 0
(1, 2) 0 1 0
(1, -1) 1 1 1
into numpy 2D array and managed to split it to 3D array(in regards to the first value) by split funcion:
arr = np.array(np.array_split(arr,2))
with result
[[[0 1 0]
[1 0 0]
[0 0 0]]
[[1 0 0]
[0 1 0]
[1 1 1]]]
I want to make a function to do the split even further, for example, to create 5D tensor from (0,0,0,0) (length 4) indices.
Any idea on how to do this recursively?
Use the following code to generate sample data:
import pandas as pd
import numpy as np
import itertools
def create_fake_data_frame(nlevels = 2, ncols = 3):
result = pd.DataFrame(
index=itertools.product(*(nlevels * [[0, 1]])),
data=np.arange(ncols*2**nlevels).reshape(2**nlevels, ncols)
)
result = convert_index_of_tuples_to_multiindex(result)
return result
def convert_index_of_tuples_to_multiindex(df):
return df.set_index(pd.MultiIndex.from_tuples(df.index))
# Increase nlevels to get dataframes with more levels in their MultiIndex
df = create_fake_data_frame(nlevels=3)
print(df)
This is the result:
0 1 2
0 0 0 0 1 2
1 3 4 5
1 0 6 7 8
1 9 10 11
1 0 0 12 13 14
1 15 16 17
1 0 18 19 20
1 21 22 23
Then, modify the dataframe in such a way that each row contains a single column, whose value is a list of the values in the corresponding row of the original dataframe:
def data_frame_with_single_column_of_lists(df):
if len(df.columns) <= 1:
return df
result = df.apply(collapse_columns_into_lists, axis=1)
return result
def collapse_columns_into_lists(s):
result = s.copy()
result['lists'] = result.values.tolist()
result = result[['lists']]
return result
df = data_frame_with_single_column_of_lists(df)
print(df)
The output will be like this:
lists
0 0 0 [0, 1, 2]
1 [3, 4, 5]
1 0 [6, 7, 8]
1 [9, 10, 11]
1 0 0 [12, 13, 14]
1 [15, 16, 17]
1 0 [18, 19, 20]
1 [21, 22, 23]
Finally, use the following code to get a tensor
def increase_list_nesting_by_removing_an_index_level(df):
def list_of_lists(series):
result = series.to_frame().set_index(series.index.droplevel(-1))
result = result.apply(lambda x: x['lists'], axis=1).to_frame()
result = [x[0] for x in result.values.tolist()]
return result
grouped = df.groupby(df.index.droplevel(-1))
result = grouped.agg(list_of_lists)
if type(result.index[0]) == tuple:
result = convert_index_of_tuples_to_multiindex(result)
return result
def tensor_from_data_frame(df):
if df.index.nlevels <= 1:
return np.array([i[0] for i in df.values])
result = increase_list_nesting_by_removing_an_index_level(df)
result = tensor_from_data_frame(result)
return result
tensor = tensor_from_data_frame(df)
print(tensor)
The result will be like this:
[[[[ 0 1 2]
[ 3 4 5]]
[[ 6 7 8]
[ 9 10 11]]]
[[[12 13 14]
[15 16 17]]
[[18 19 20]
[21 22 23]]]]
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With