Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Hyperopt mongotrials issue with Pickle: AttributeError: 'module' object has no attribute

I'm trying to use Hyperopt parallel search with MongoDB, and encountered some issues with Mongotrials, which have been discussed here. I've tried all their methods, and I am still unable to find solutions to my specific problem. The specific model I'm trying to minimize is RadomForestRegressor from sklearn.

I've followed this tutorial. And I'm able to print out the calculated "fmin" with no issue.

Here are my steps so far:

1) Activate a virtual environment called "tensorflow" (I've installed all my libraries there)

2) Start MongoDB:

(tensorflow) bash-3.2$ mongod --dbpath . --port 1234 --directoryperdb --journal --nohttpinterface

3) Initiate workers:

(tensorflow) bash-3.2$ hyperopt-mongo-worker --mongo=localhost:1234/foo_db --poll-interval=0.1

4) Run my python code, and my python code is as follows:

import numpy as np
import pandas as pd

from sklearn.metrics import mean_absolute_error

from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from hyperopt.mongoexp import MongoTrials


# Preprocessing data
train_xg = pd.read_csv('train.csv')
n_train = len(train_xg)
print "Whole data set size: ", n_train

# Creating columns for features, and categorical features
features_col = [x for x in train_xg.columns if x not in ['id', 'loss', 'log_loss']]
cat_features_col = [x for x in train_xg.select_dtypes(include=['object']).columns if x not in ['id', 'loss', 'log_loss']]
for c in range(len(cat_features_col)):
    train_xg[cat_features_col[c]] = train_xg[cat_features_col[c]].astype('category').cat.codes

# Use this to train random forest regressor
train_xg_x = np.array(train_xg[features_col])
train_xg_y = np.array(train_xg['loss'])


space_rf = { 'min_samples_leaf': hp.choice('min_samples_leaf', range(1,100)) }

trials = MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp1')

def minMe(params):
    # Hyperopt tuning for hyperparameters
    from sklearn.model_selection import cross_val_score
    from sklearn.ensemble import RandomForestRegressor
    from hyperopt import STATUS_OK

    try:
        import dill as pickle
        print('Went with dill')
    except ImportError:
        import pickle

    def hyperopt_rf(params):
        rf = RandomForestRegressor(**params)
        return cross_val_score(rf, train_xg_x, train_xg_y).mean()

    acc = hyperopt_rf(params)
    print 'new acc:', acc, 'params: ', params
    return {'loss': -acc, 'status': STATUS_OK}

best = fmin(fn=minMe, space=space_rf, trials=trials, algo=tpe.suggest, max_evals=100)
print "Best: ", best

5) After I run the above Python code, I get the following errors:

INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
  File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
    sys.exit(hyperopt.mongoexp.main_worker())
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
    return main_worker_helper(options, args)
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
    mworker.run_one(reserve_timeout=float(options.reserve_timeout))
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
    domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
  File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
    sys.exit(hyperopt.mongoexp.main_worker())
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
    return main_worker_helper(options, args)
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
    mworker.run_one(reserve_timeout=float(options.reserve_timeout))
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
    domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
  File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
    sys.exit(hyperopt.mongoexp.main_worker())
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
    return main_worker_helper(options, args)
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
    mworker.run_one(reserve_timeout=float(options.reserve_timeout))
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
    domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:no job found, sleeping for 0.7s
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
  File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
    sys.exit(hyperopt.mongoexp.main_worker())
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
    return main_worker_helper(options, args)
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
    mworker.run_one(reserve_timeout=float(options.reserve_timeout))
  File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
    domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:exiting with N=9223372036854775803 after 4 consecutive exceptions

6) Then Mongo workers would shut off.

Things I've tried:

  • install "dill" as the error suggested -> didn't work
  • Put global imports into the objective function so it can pickle -> didn't work
  • Put try except with "dill" or "pickle" as import -> didn't work

Does anyone have similar issues? I'm running out of ideas to try, and have been working on this for 2 days in vain. I think I am missing something really simple here, just can't seem to find it. What am I missing? Any suggestion is welcomed please!

like image 399
wernerchao Avatar asked Dec 29 '16 05:12

wernerchao


2 Answers

Had the same problem in python 3.5. Installing Dill didn't help, nor dir setting workdir in MongoTrials or hyperopt-mongo-worker cli. hyperopt-mongo-worker doesn't seem to have access to __main__ where the function was defined:

AttributeError: Can't get attribute 'minMe' on <module '__main__' from ...hyperopt-mongo-worker

As @jaikumarm suggested, I circumvented the problem by writing a module file with all the required functions. However, instead of soft-linking it into the bin directory, I extended the PYTHONPATH before running hyperopt-mongo-worker:

export PYTHONPATH="${PYTHONPATH}:<dir_with_the_module.py>"
hyperopt-mongo-worker ...

That way, the hyperopt-monogo-worker is able to import the module containing minMe.

like image 144
Avsecz Avatar answered Nov 12 '22 06:11

Avsecz


I fought with this for several days before coming up with a workable solution. there are two problems: 1. the mongo worker spawns off a separate process to run the optimizer so any context from your original python file is lost and unavailable for this new process. 2. the imports on this new process happen in the context of the hyperopt-mongo-worker scipy, which is in your case will be /Users/WernerChao/tensorflow/bin/.

So my solution is to make this new optimizer function completely self sufficient

optimizer.py

import numpy as np
import pandas as pd

from sklearn.metrics import mean_absolute_error

# Preprocessing data
train_xg = pd.read_csv('train.csv')
n_train = len(train_xg)
print "Whole data set size: ", n_train

# Creating columns for features, and categorical features
features_col = [x for x in train_xg.columns if x not in ['id', 'loss', 'log_loss']]
cat_features_col = [x for x in train_xg.select_dtypes(include=['object']).columns if x not in ['id', 'loss', 'log_loss']]
for c in range(len(cat_features_col)):
    train_xg[cat_features_col[c]] = train_xg[cat_features_col[c]].astype('category').cat.codes

# Use this to train random forest regressor
train_xg_x = np.array(train_xg[features_col])
train_xg_y = np.array(train_xg['loss'])



def minMe(params):
    # Hyperopt tuning for hyperparameters
    from sklearn.model_selection import cross_val_score
    from sklearn.ensemble import RandomForestRegressor
    from hyperopt import STATUS_OK

    try:
        import dill as pickle
        print('Went with dill')
    except ImportError:
        import pickle

    def hyperopt_rf(params):
        rf = RandomForestRegressor(**params)
        return cross_val_score(rf, train_xg_x, train_xg_y).mean()

    acc = hyperopt_rf(params)
    print 'new acc:', acc, 'params: ', params
    return {'loss': -acc, 'status': STATUS_OK}

wrapper.py

from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from hyperopt.mongoexp import MongoTrials

import optimizer

space_rf = { 'min_samples_leaf': hp.choice('min_samples_leaf', range(1,100)) }
best = fmin(fn=optimizer.minMe, space=space_rf, trials=trials, algo=tpe.suggest, max_evals=100)
print "Best: ", best

trials = MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp1')

Once you have this code link the optimizer.py to the bin folder

ln -s /Users/WernerChao/Git/test/optimizer.py /Users/WernerChao/tensorflow/bin/

now run the wrapper.py and then the mongo worker it should be able to import the optimizer from its local context and run the minMe function.

like image 32
jaikumarm Avatar answered Nov 12 '22 06:11

jaikumarm