"ValueError: max_features must be in (0, n_features] " in scikit when using random forest

Tags:

I have a dataset of 20 features and 840 rows. I have already optimized the classifier (random forest). My parameters are n_estimators=100 and max_features=5. I want to do a classification for each feature. I mean with each of the features I want to know the prediction accuracy. But when I use my code I get an error. I am using scikit ver. 18.

How can I fix the problem?

for name in ["AWA"]: 
    x=sio.loadmat('/home/TrainVal/{}_Fp1.mat'.format(name))['x'] 
    s_y=sio.loadmat('/home/TrainVal/{}_Fp1.mat'.format(name))['y']
    y=np.ravel(s_y)

    print(name, x.shape, y.shape) 
    print("")


    clf = make_pipeline(preprocessing.RobustScaler(), RandomForestClassifier(n_estimators = 100, 
                                                                         max_features=5, n_jobs=-1)) 
    #########10x10 SSS##############
    print("10x10")

    for i in range(x.shape[1]): 
        xA=x[:, i].reshape(-1,1)

        xSSSmean = [] 
        for j in range(10):
            sss = StratifiedShuffleSplit(n_splits=10, test_size=0.1, random_state=j) 
            scoresSSS = cross_val_score(clf, xA, y, cv=sss)
            xSSSmean.append(scoresSSS.mean()) 

        result_list.append(np.mean(xSSSmean))  
        plt.bar(i, np.mean(xSSSmean)*100, align = 'center')      
        plt.ylabel('Accuracy')
        plt.xlabel('Features')    
        plt.title('Accuracy per feature: {}_RF_Fp1(20)'.format(name)) 

        xticks=np.arange(i+1)
        plt.xticks(xticks, rotation = 'vertical')
    plt.show()





#THE ERROR


ValueError                                Traceback (most recent call last)
<ipython-input-2-a5faae7f83a2> in <module>()
     24 
     25             sss = StratifiedShuffleSplit(n_splits=10, test_size=0.1, random_state=j)#ver18
---> 26             scoresSSS = cross_val_score(clf, xA, y, cv=sss)
     27             xSSSmean.append(scoresSSS.mean()) 
     28             #print(scoresSSS)

 /home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/model_selection/_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)
    138                                               train, test, verbose, None,
    139                                               fit_params)
--> 140                       for train, test in cv_iter)
    141     return np.array(scores)[:, 0]
    142 

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    756             # was dispatched. In particular this covers the edge
    757             # case of Parallel used with an exhausted iterator.
--> 758             while self.dispatch_one_batch(iterator):
    759                 self._iterating = True
    760             else:

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    606                 return False
    607             else:
--> 608                 self._dispatch(tasks)
    609                 return True
    610 

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    569         dispatch_timestamp = time.time()
    570         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 571         job = self._backend.apply_async(batch, callback=cb)
    572         self._jobs.append(job)
    573 

 /home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
    107     def apply_async(self, func, callback=None):
    108         """Schedule a func to be run"""
--> 109         result = ImmediateResult(func)
    110         if callback:
    111             callback(result)

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
    324         # Don't delay the application, to avoid keeping the input
    325         # arguments in memory
--> 326         self.results = batch()
    327 
    328     def get(self):

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

 /home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
    236             estimator.fit(X_train, **fit_params)
    237         else:
--> 238             estimator.fit(X_train, y_train, **fit_params)
    239 
    240     except Exception as e:

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
    268         Xt, fit_params = self._fit(X, y, **fit_params)
    269         if self._final_estimator is not None:
--> 270             self._final_estimator.fit(Xt, y, **fit_params)
    271         return self
    272 

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/ensemble/forest.py in fit(self, X, y, sample_weight)
    324                     t, self, X, y, sample_weight, i, len(trees),
    325                     verbose=self.verbose, class_weight=self.class_weight)
--> 326                 for i, t in enumerate(trees))
    327 
    328             # Collect newly grown trees

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    756             # was dispatched. In particular this covers the edge
    757             # case of Parallel used with an exhausted iterator.
--> 758             while self.dispatch_one_batch(iterator):
    759                 self._iterating = True
    760             else:

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    606                 return False
    607             else:
--> 608                 self._dispatch(tasks)
    609                 return True
    610 

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    569         dispatch_timestamp = time.time()
    570         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 571         job = self._backend.apply_async(batch, callback=cb)
    572         self._jobs.append(job)
    573 

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
    107     def apply_async(self, func, callback=None):
    108         """Schedule a func to be run"""
--> 109         result = ImmediateResult(func)
    110         if callback:
    111             callback(result)

 /home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
    324         # Don't delay the application, to avoid keeping the input
    325         # arguments in memory
--> 326         self.results = batch()
    327 
    328     def get(self):

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

 /home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
    132 
    133     def __len__(self):

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/ensemble/forest.py in _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight)
    118             curr_sample_weight *= compute_sample_weight('balanced', y, indices)
    119 
--> 120         tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
    121     else:
    122         tree.fit(X, y, sample_weight=sample_weight, check_input=False)

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/tree/tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
    737             sample_weight=sample_weight,
    738             check_input=check_input,
--> 739             X_idx_sorted=X_idx_sorted)
    740         return self
    741 

/home/karim/anaconda2/envs/scikit18/lib/python3.5/site-packages/sklearn/tree/tree.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
    244             raise ValueError("max_depth must be greater than zero. ")
    245         if not (0 < max_features <= self.n_features_):
--> 246             raise ValueError("max_features must be in (0, n_features]")
    247         if not isinstance(max_leaf_nodes, (numbers.Integral, np.integer)):
    248             raise ValueError("max_leaf_nodes must be integral number but was "

ValueError: max_features must be in (0, n_features]

319

asked Feb 06 '17 16:02

Aizzaac

1 Answers

So I managed to solve the problem!!! :) In scikit page says:

*If float, then max_features is a percentage and int(max_features * n_features) features are considered at each split.*

My value:

List item

n_features=20. This is in int. It is the number of features that I have in my dataset.

max_features: this is the number of features that I want to use. But they are in int so I have to turn them into float

To turn it into float I have to use the formula that is in scikit:

int(max_features * n_features)
int(x * 20)=2
x=0.1

We have to assume that I want to use only 2 features from the 20.

x is the percentage in float

I changed the value in max_features from int to float. Just like this:

max_features:

(int) (float)

20 = 1.0

15 = 0.75

10 = 0.5

5 = 0.25

2 = 0.1

EXAMPLE

#Instead of: 
clf = make_pipeline(preprocessing.RobustScaler(), RandomForestClassifier(n_estimators = 100, 
                   max_features=5, n_jobs=-1)) 

#I did:
clf = make_pipeline(preprocessing.RobustScaler(), RandomForestClassifier(n_estimators = 100, 
                   max_features=0.25, n_jobs=-1))

161

answered Oct 03 '22 20:10

Aizzaac

Related questions
                            
                                PyInstaller; "could not find or load the Qt platform plugin "windows"
                            
                                how to convert bytes to string in Python 3 [duplicate]
                            
                                Pandas select unique values from column
                            
                                Configure Python Flask App to use "create_app" factory and use database in model class
                            
                                PyTest - Apply mock to all tests
                            
                                Exception similar to ModuleNotFoundError in Python 2.7?
                            
                                Variable X not updating when variables that should effect X change
                            
                                Disabling part of the nlp pipeline
                            
                                python sklearn get list of available hyper parameters for model
                            
                                The smtplib.server.sendmail function in python raises UnicodeEncodeError: 'ascii' codec can't encode character
                            
                                Show text inside the tags BeautifulSoup
                            
                                How to convert a QByteArray to a python string in PySide2 [duplicate]
                            
                                Tox 0% coverage
                            
                                How to connect kafka topic with web endpoint using Faust Python package?
                            
                                Ubuntu Tkinter installation does not include PyImagingPhoto
                            
                                How to remove ns0 tag while dumping
                            
                                function to write on stderr with python2 and python3
                            
                                No module named 'win32api'
                            
                                how to create a .condarc file for Anaconda?
                            
                                inserting numpy integer types into sqlite with python3

Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!

Donate Us With

"ValueError: max_features must be in (0, n_features] " in scikit when using random forest

Tags:

optimization

python-3.x

machine-learning

scikit-learn

random-forest

Aizzaac

People also ask

1 Answers

Aizzaac

Recent Activity

Donate For Us