Объект Xgboost «DataFrame» не имеет атрибута «num_row»

Я работаю над проблемой классификации нескольких классов, используя xgboost. Форма моих данных

print(train_ohe.shape, test_ohe.shape)
# (43266, 190) (18543, 190)

Пользовательская функция оценки F1 и код обучения модели

def f1_eval(y_pred, dtrain):
    y_true = dtrain.get_label()
    err = 1-f1_score(y_true, np.round(y_pred),average='weighted')
    return 'f1_err', err

def train_model(algo,train,test,predictors,useTrainCV=True,
                cv_folds=5,early_stopping_rounds=50):
    if useTrainCV:
        xgb_param = algo.get_params()
        xgb_train = xgb.DMatrix(train[predictors].values,label=train[target].values)
        xgb_test = xgb.DMatrix(test[predictors].values)
        print(xgb_train.num_row())
        print(xgb_test.num_row())
        cv_result = xgb.cv(xgb_param,
                           train,
                           num_boost_round=xgb_param['n_estimators'],
                           nfold=cv_folds,
                           metrics='f1_eval',
                          early_stopping_rounds=early_stopping_rounds)
        algo.set_params(n_estimators=cv_result.shape[0])

    # Fit algorithm on data
    algo.fit(train[predictors],train[target],eval_metric=f1_eval)

    # Predict train data
    train_predictions = algo.predict(train[predictors])
    train_pred_prob = algo.predict_proba(train[predictors])[:,1]

    # Report model performance
    print("Model performance")
    print("F1 Score Train {}".format(f1_score(train[target].values,train_predictions)))

    # Predict test data
    test_predictions = algo.predict(test[predictors])

    # Performance 
    print("F1 Score Test {}".format(f1_score(test[target].values,test_predictions)))

Вот мой код XgbClassifier. Попытка найти количество оценщиков для высокой скорости обучения.

target = 'Complaint-Status'

predictors = [x for x in train_ohe.columns if x not in target]

xgb1 = XGBClassifier(learning_rate=0.1,
                    n_estimators=1000,
                    max_depth=5,
                    min_child_weight=1,
                    gamma=0,
                    subsample=0.8,
                    colsample_bytree=0.8,
                    objective='multi:softmax',
                    nthread=8,
                    scale_pos_weight=1,
                    seed=145)

train_model(xgb1, train_ohe, test_ohe, predictors)

Я получаю следующую ошибку атрибута, в которой говорится, что объект «DataFrame» не имеет атрибута «num_row» в строке xgb.cv в функции train_model.

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-116-5933227c171d> in <module>
     18                     seed=145)
     19 print(xgb1.get_params())
---> 20 train_model(xgb1, train_ohe, test_ohe, predictors)
     21 # xgb_param = xgb1.get_params()
     22 # cv_folds=5

<ipython-input-114-a9df39c19abf> in train_model(algo, train, test, predictors, useTrainCV, cv_folds, early_stopping_rounds)
     19                            nfold=cv_folds,
     20                            metrics='f1_eval',
---> 21                           early_stopping_rounds=early_stopping_rounds)
     22         algo.set_params(n_estimators=cv_result.shape[0])
     23 

/opt/virtual_env/py3/lib/python3.6/site-packages/xgboost/training.py in cv(params, dtrain, num_boost_round, nfold, stratified, folds, metrics, obj, feval, maximize, early_stopping_rounds, fpreproc, as_pandas, verbose_eval, show_stdv, seed, callbacks, shuffle)
    413     results = {}
    414     cvfolds = mknfold(dtrain, nfold, params, seed, metrics, fpreproc,
--> 415                       stratified, folds, shuffle)
    416 
    417     # setup callbacks

/opt/virtual_env/py3/lib/python3.6/site-packages/xgboost/training.py in mknfold(dall, nfold, param, seed, evals, fpreproc, stratified, folds, shuffle)
    246         # Do standard k-fold cross validation
    247         if shuffle is True:
--> 248             idx = np.random.permutation(dall.num_row())
    249         else:
    250             idx = np.arange(dall.num_row())

/opt/virtual_env/py3/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
   4374             if self._info_axis._can_hold_identifiers_and_holds_name(name):
   4375                 return self[name]
-> 4376             return object.__getattribute__(self, name)
   4377 
   4378     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'num_row'    

person joel    schedule 21.01.2019    source источник


Ответы (1)


Увидел ваш пост, когда искал ту же ошибку.

Ваш второй параметр train кода:

 cv_result = xgb.cv(xgb_param,
                           train,
                           num_boost_round=xgb_param['n_estimators'],
                           nfold=cv_folds,
                           metrics='f1_eval',
                          early_stopping_rounds=early_stopping_rounds)
        algo.set_params(n_estimators=cv_result.shape[0])

должна быть матрица, такая как

train = xgb.DMatrix(X_train, y_train)

надеюсь это поможет

person Donny Soh    schedule 31.03.2019