TypeError: Singleton array array(<__main__.AZHU_EmailClassifier_2 object at 0x000001D6E7A680D0>, dtype=object) cannot be considered a valid collection.
I get this error when I try to run the train_test_split function within my custom AZHU_EmailClassifier_2 class.
My class:
class AZHU_EmailClassifier_2:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def __init__(self):
pass
def retrain_model(self, csv_file):
MIN_ROW_NUMBER = 500
TEST_SIZE = 0.25
RANDOM_STATE = 42
self.os.chdir(r"c:\LORI\PROJECTS\ALLIANZ\INCOMING_CHANNELS") # <---- a retraining file mappaja
df=self.pd.read_excel(csv_file,error_bad_lines=False, header=None)
df.dropna(axis=0,how='any', inplace=True)
rows_no=df.shape[0]
if rows_no<MIN_ROW_NUMBER:
print("Insufficient number of rows (<35.000)! RETRAINING ABORTED")
return None
X=df[0]
y=df[1]
X_train, X_test, y_train, y_test=self.train_test_split(X,y)
#X_train, X_test, y_train, y_test=self.train_test_split(X,y,test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y)
return X_train
The error is triggered when I run the train_test_split function.
The whole error message:
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) in 1 instance = AZHU_EmailClassifier_2() 2 ----> 3 instance.retrain_model("retraining_dummy.xlsx")
in retrain_model(self, csv_file) 28 y=df[1] 29 ---> 30 X_train, X_test, y_train, y_test=self.train_test_split(X,y) 31 #X_train, X_test, y_train, y_test=self.train_test_split(X,y,test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y) 32
~\Anaconda3\lib\site-packages\sklearn\model_selection_split.py in train_test_split(*arrays, **options) 2125 raise TypeError("Invalid parameters passed: %s" % str(options)) 2126 -> 2127 arrays = indexable(*arrays) 2128 2129 n_samples = _num_samples(arrays[0])
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in indexable(*iterables) 291 """ 292 result = [_make_indexable(X) for X in iterables] --> 293 check_consistent_length(*result) 294 return result 295
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays) 251 """ 252 --> 253 lengths = [_num_samples(X) for X in arrays if X is not None] 254 uniques = np.unique(lengths) 255 if len(uniques) > 1:
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in (.0) 251 """ 252 --> 253 lengths = [_num_samples(X) for X in arrays if X is not None] 254 uniques = np.unique(lengths) 255 if len(uniques) > 1:
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in _num_samples(x) 194 if hasattr(x, 'shape') and x.shape is not None: 195 if len(x.shape) == 0: --> 196 raise TypeError("Singleton array %r cannot be considered" 197 " a valid collection." % x) 198 # Check that shape is returning an integer or default to len
TypeError: Singleton array array(<main.AZHU_EmailClassifier_2 object at 0x000001D6E7A68F10>, dtype=object) cannot be considered a valid collection.
I have no clue why does it throw this error. Could you please point me in the right direction? Any help is appreciated!
You got this error because you import train_test_split
inside the class, therefore, train_test_split
becomes a bound method rather than a function, and the instance will be passed as the first argument whenever the method is called. Here is an minimum example that can reconstruct the situation
class test():
from sklearn.model_selection import train_test_split
def retrain_model(self):
print(self.train_test_split)
print(self.train_test_split())
test_instance = test()
test_instance.retrain_model()
After you run this script, you will got an TypeError
TypeError: Singleton array array(<__main__.test object at 0x7ffa473ae438>, dtype=object) cannot be considered a valid collection.
while the location of self.train_test_split
in memory is also 0x7ffa473ae438
.
According to PEP8
Imports are always put at the top of the file, just after any module comments and docstrings, and before module globals and constants.
Therefore, the simplest solution is to import everything outside the class and call train_test_split
directly
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
class AZHU_EmailClassifier_2():
def __init__(self):
pass
def retrain_model(self,):
MIN_ROW_NUMBER = 20
TEST_SIZE = 0.25
RANDOM_STATE = 42
df = pd.DataFrame({0:np.linspace(1,100,100),1:np.random.rand(100)})
X=df[0];y=df[1]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=TEST_SIZE,random_state=RANDOM_STATE)
return X_train
test = AZHU_EmailClassifier_2()
test.retrain_model()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With