-
-
Notifications
You must be signed in to change notification settings - Fork 8.8k
Closed
Description
The document says that the default value of lambdarank_pair_method
is mean
, but the next code snipts show that the default value is topk
. The following code runs in version '2.1.0-dev'
.
import numpy as np
import pandas as pd
import xgboost as xgb
np.random.seed(42)
n_groups = 1000
group_size = 2000
n_features = 100
n_levels = 20
rows = n_groups * group_size
features = pd.DataFrame(np.random.randn(rows, n_features).astype('float32'), columns=[f'f{i:03d}' for i in range(n_features)])
qids = pd.Series(np.arange(rows, dtype='int') // group_size)
labels = pd.Series(np.random.randn(rows).astype('float32')).groupby(qids).rank(method='first').sub(1) // (group_size // n_levels)
dmatrix = xgb.DMatrix(features, label=labels, qid=qids)
keep default
params = {
'objective': 'rank:pairwise',
# 'objective': 'multi:softprob',
# 'num_class': n_levels,
# 'base_score': 0.5,
# 'lambdarank_pair_method': 'mean',
'lambdarank_normalization': False,
'lambdarank_num_pair_per_sample': 1,
'booster': 'gbtree',
'tree_method': 'hist',
'verbosity': 1,
'seed': 42,
'learning_rate': 0.1,
'max_depth': 6,
'gamma': 1,
'min_child_weight': 4,
'subsample': 0.9,
'colsample_bytree': 0.7,
'nthread': 20,
'reg_lambda': 1,
'reg_alpha': 1,
'eval_metric': ['ndcg@100', 'ndcg@500', 'ndcg@1000'],
}
booster = xgb.train(params, dmatrix, 100, verbose_eval=10, evals=[(dmatrix, 'train')])
# [0] train-ndcg@100:0.10113 train-ndcg@500:0.21286 train-ndcg@1000:0.36429
# [10] train-ndcg@100:0.10559 train-ndcg@500:0.21556 train-ndcg@1000:0.36704
# [20] train-ndcg@100:0.10740 train-ndcg@500:0.21668 train-ndcg@1000:0.36754
# [30] train-ndcg@100:0.10885 train-ndcg@500:0.21791 train-ndcg@1000:0.36867
# [40] train-ndcg@100:0.10967 train-ndcg@500:0.21847 train-ndcg@1000:0.36898
# [50] train-ndcg@100:0.11000 train-ndcg@500:0.21876 train-ndcg@1000:0.36956
# [60] train-ndcg@100:0.11101 train-ndcg@500:0.21930 train-ndcg@1000:0.36988
# [70] train-ndcg@100:0.11165 train-ndcg@500:0.21962 train-ndcg@1000:0.37038
# [80] train-ndcg@100:0.11218 train-ndcg@500:0.22016 train-ndcg@1000:0.37073
# [90] train-ndcg@100:0.11272 train-ndcg@500:0.22057 train-ndcg@1000:0.37113
# [99] train-ndcg@100:0.11303 train-ndcg@500:0.22097 train-ndcg@1000:0.37141
use topk
params = {
'objective': 'rank:pairwise',
# 'objective': 'multi:softprob',
# 'num_class': n_levels,
# 'base_score': 0.5,
'lambdarank_pair_method': 'topk',
'lambdarank_normalization': False,
'lambdarank_num_pair_per_sample': 1,
'booster': 'gbtree',
'tree_method': 'hist',
'verbosity': 1,
'seed': 42,
'learning_rate': 0.1,
'max_depth': 6,
'gamma': 1,
'min_child_weight': 4,
'subsample': 0.9,
'colsample_bytree': 0.7,
'nthread': 20,
'reg_lambda': 1,
'reg_alpha': 1,
'eval_metric': ['ndcg@100', 'ndcg@500', 'ndcg@1000'],
}
booster = xgb.train(params, dmatrix, 100, verbose_eval=10, evals=[(dmatrix, 'train')])
# [0] train-ndcg@100:0.10113 train-ndcg@500:0.21286 train-ndcg@1000:0.36429
# [10] train-ndcg@100:0.10559 train-ndcg@500:0.21556 train-ndcg@1000:0.36704
# [20] train-ndcg@100:0.10740 train-ndcg@500:0.21668 train-ndcg@1000:0.36754
# [30] train-ndcg@100:0.10885 train-ndcg@500:0.21791 train-ndcg@1000:0.36867
# [40] train-ndcg@100:0.10967 train-ndcg@500:0.21847 train-ndcg@1000:0.36898
# [50] train-ndcg@100:0.11000 train-ndcg@500:0.21876 train-ndcg@1000:0.36956
# [60] train-ndcg@100:0.11101 train-ndcg@500:0.21930 train-ndcg@1000:0.36988
# [70] train-ndcg@100:0.11165 train-ndcg@500:0.21962 train-ndcg@1000:0.37038
# [80] train-ndcg@100:0.11218 train-ndcg@500:0.22016 train-ndcg@1000:0.37073
# [90] train-ndcg@100:0.11272 train-ndcg@500:0.22057 train-ndcg@1000:0.37113
# [99] train-ndcg@100:0.11303 train-ndcg@500:0.22097 train-ndcg@1000:0.37141
use mean
:
params = {
'objective': 'rank:pairwise',
# 'objective': 'multi:softprob',
# 'num_class': n_levels,
# 'base_score': 0.5,
'lambdarank_pair_method': 'mean',
'lambdarank_normalization': False,
'lambdarank_num_pair_per_sample': 1,
'booster': 'gbtree',
'tree_method': 'hist',
'verbosity': 1,
'seed': 42,
'learning_rate': 0.1,
'max_depth': 6,
'gamma': 1,
'min_child_weight': 4,
'subsample': 0.9,
'colsample_bytree': 0.7,
'nthread': 20,
'reg_lambda': 1,
'reg_alpha': 1,
'eval_metric': ['ndcg@100', 'ndcg@500', 'ndcg@1000'],
}
booster = xgb.train(params, dmatrix, 100, verbose_eval=10, evals=[(dmatrix, 'train')])
# [0] train-ndcg@100:0.10479 train-ndcg@500:0.21641 train-ndcg@1000:0.36769
# [10] train-ndcg@100:0.11850 train-ndcg@500:0.23082 train-ndcg@1000:0.38270
# [20] train-ndcg@100:0.12608 train-ndcg@500:0.23867 train-ndcg@1000:0.39229
# [30] train-ndcg@100:0.13020 train-ndcg@500:0.24464 train-ndcg@1000:0.39863
# [40] train-ndcg@100:0.13368 train-ndcg@500:0.24896 train-ndcg@1000:0.40384
# [50] train-ndcg@100:0.13506 train-ndcg@500:0.25252 train-ndcg@1000:0.40807
# [60] train-ndcg@100:0.13731 train-ndcg@500:0.25628 train-ndcg@1000:0.41222
# [70] train-ndcg@100:0.14007 train-ndcg@500:0.25916 train-ndcg@1000:0.41580
# [80] train-ndcg@100:0.14266 train-ndcg@500:0.26243 train-ndcg@1000:0.41901
# [90] train-ndcg@100:0.14480 train-ndcg@500:0.26488 train-ndcg@1000:0.42216
# [99] train-ndcg@100:0.14655 train-ndcg@500:0.26781 train-ndcg@1000:0.42499
One more thing, the default value of parameter lambdarank_normalization
mentioned in #9625 is True
. In order to be keep compatible with the old version, I think maybe it should be False
.
Metadata
Metadata
Assignees
Labels
No labels