V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
推荐学习书目
Learn Python the Hard Way
Python Sites
PyPI - Python Package Index
http://diveintopython.org/toc/index.html
Pocoo
值得关注的项目
PyPy
Celery
Jinja2
Read the Docs
gevent
pyenv
virtualenv
Stackless Python
Beautiful Soup
结巴中文分词
Green Unicorn
Sentry
Shovel
Pyflakes
pytest
Python 编程
pep8 Checker
Styles
PEP 8
Google Python Style Guide
Code Style from The Hitchhiker's Guide
ouying
V2EX  ›  Python

这个要怎么改呢? KFold 不可迭代 刚开始学这个 路过的大佬们教一下我吧 拜托了

  •  
  •   ouying · 2019-07-29 17:26:48 +08:00 · 2423 次点击
    这是一个创建于 1947 天前的主题,其中的信息可能已经有所发展或是发生改变。

    主要涉及代码: import pandas as pd import numpy as np from scipy.stats import skew import xgboost as xgb from sklearn.model_selection import KFold from sklearn.ensemble import ExtraTreesRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error from sklearn.linear_model import Ridge, RidgeCV, ElasticNet, LassoCV, Lasso from math import sqrt

    TARGET = 'SalePrice' NFOLDS = 5 SEED = 0 NROWS = None SUBMISSION_FILE = 'sample_submission.csv'

    Load the data

    train = pd.read_csv("E:/数据集 /data/home/aistudio/data/data9072/housingPrices_train.csv") test = pd.read_csv("E:/数据集 /data/home/aistudio/data/data9072/housingPrices_test.csv")

    ntrain = train.shape[0] ntest = test.shape[0]

    Preprocessing

    y_train = np.log(train[TARGET]+1)

    train.drop([TARGET], axis=1, inplace=True)

    all_data = pd.concat((train.loc[:,'MSSubClass':'SaleCondition'], test.loc[:,'MSSubClass':'SaleCondition']))

    #log transform skewed numeric features: numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index

    skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness skewed_feats = skewed_feats[skewed_feats > 0.75] skewed_feats = skewed_feats.index

    all_data[skewed_feats] = np.log1p(all_data[skewed_feats])

    all_data = pd.get_dummies(all_data)

    #filling NA's with the mean of the column: all_data = all_data.fillna(all_data.mean())

    #creating matrices for sklearn:

    x_train = np.array(all_data[:train.shape[0]]) x_test = np.array(all_data[train.shape[0]:])

    ##交叉采样## kf = KFold(ntrain, shuffle=True, random_state=SEED)

    class SklearnWrapper(object): def init(self, clf, seed=0, params=None): params['random_state'] = seed self.clf = clf(**params)

    def train(self, x_train, y_train):
        self.clf.fit(x_train, y_train)
    
    def predict(self, x):
        return self.clf.predict(x)
    

    class XgbWrapper(object): def init(self, seed=0, params=None): self.param = params self.param['seed'] = seed self.nrounds = params.pop('nrounds', 250)

    def train(self, x_train, y_train):
        dtrain = xgb.DMatrix(x_train, label=y_train)
        self.gbdt = xgb.train(self.param, dtrain, self.nrounds)
    
    def predict(self, x):
        return self.gbdt.predict(xgb.DMatrix(x))
    

    def get_oof(clf): oof_train = np.zeros((ntrain,)) oof_test = np.zeros((ntest,)) oof_test_skf = np.empty((NFOLDS, ntest))

    for i, train_index, test_index in enumerate(kf):
        x_tr = x_train[train_index]
        y_tr = y_train[train_index]
        x_te = x_train[test_index]
    
        clf.train(x_tr, y_tr)
    
        oof_train[test_index] = clf.predict(x_te)
        oof_test_skf[i, :] = clf.predict(x_test)
    
    oof_test[:] = oof_test_skf.mean(axis=0)
    return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)
    

    et_params = { 'n_jobs': 16, 'n_estimators': 100, 'max_features': 0.5, 'max_depth': 12, 'min_samples_leaf': 2, }

    rf_params = { 'n_jobs': 16, 'n_estimators': 100, 'max_features': 0.2, 'max_depth': 12, 'min_samples_leaf': 2, }

    xgb_params = { 'seed': 0, 'colsample_bytree': 0.7, 'silent': 1, 'subsample': 0.7, 'learning_rate': 0.075, 'objective': 'reg:linear', 'max_depth': 4, 'num_parallel_tree': 1, 'min_child_weight': 1, 'eval_metric': 'rmse', 'nrounds': 500 }

    rd_params={ 'alpha': 10 }

    ls_params={ 'alpha': 0.005 }

    xg = XgbWrapper(seed=SEED, params=xgb_params) et = SklearnWrapper(clf=ExtraTreesRegressor, seed=SEED, params=et_params) rf = SklearnWrapper(clf=RandomForestRegressor, seed=SEED, params=rf_params) rd = SklearnWrapper(clf=Ridge, seed=SEED, params=rd_params) ls = SklearnWrapper(clf=Lasso, seed=SEED, params=ls_params)

    xg_oof_train, xg_oof_test = get_oof(xg) et_oof_train, et_oof_test = get_oof(et) rf_oof_train, rf_oof_test = get_oof(rf) rd_oof_train, rd_oof_test = get_oof(rd) ls_oof_train, ls_oof_test = get_oof(ls) 。。。。。

    报错如下: 153 ls = SklearnWrapper(clf=Lasso, seed=SEED, params=ls_params) 154 --> 155 xg_oof_train, xg_oof_test = get_oof(xg) 156 et_oof_train, et_oof_test = get_oof(et) 157 rf_oof_train, rf_oof_test = get_oof(rf)

    <ipython-input-22-f597c93e7ccc> in get_oof(clf) 91 oof_test_skf = np.empty((NFOLDS, ntest)) 92 ---> 93 for i, train_index, test_index in enumerate(kf): 94 x_tr = x_train[train_index] 95 y_tr = y_train[train_index]</ipython-input-22-f597c93e7ccc>

    TypeError: 'KFold' object is not iterable

    ouying
        1
    ouying  
    OP
       2019-07-29 17:28:00 +08:00
    我复制的时候不是这样子的 大家可以先看报错部分
    关于   ·   帮助文档   ·   博客   ·   API   ·   FAQ   ·   实用小工具   ·   4265 人在线   最高记录 6679   ·     Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 · 27ms · UTC 05:29 · PVG 13:29 · LAX 21:29 · JFK 00:29
    Developed with CodeLauncher
    ♥ Do have faith in what you're doing.