R/extract_importance_xgboost.R
extract_importance_xgboost.Rd
Extract the individual-algorithm extrinsic importance from an xgboost object, along with the importance rank.
extract_importance_xgboost(fit = NULL, feature_names = "", coef = 0)
the xgboost
object.
the feature names
the Super Learner coefficient associated with the learner.
a tibble, with columns algorithm
(the fitted algorithm),
feature
(the feature), importance
(the algorithm-specific
extrinsic importance of the feature), rank
(the feature importance
rank, with 1 indicating the most important feature), and weight
(the algorithm's weight in the Super Learner)
# \donttest{
data("biomarkers")
# subset to complete cases for illustration
cc <- complete.cases(biomarkers)
dat_cc <- biomarkers[cc, ]
# use only the mucinous outcome, not the high-malignancy outcome
y <- dat_cc$mucinous
x <- as.matrix(dat_cc[, !(names(dat_cc) %in% c("mucinous", "high_malignancy"))])
feature_nms <- names(x)
set.seed(20231129)
xgbmat <- xgboost::xgb.DMatrix(data = x, label = y)
# get the fit, using a small number of rounds for illustration only
fit <- xgboost::xgboost(data = xgbmat, objective = "binary:logistic", nthread = 1, nrounds = 10)
#> [1] train-logloss:0.600973
#> [2] train-logloss:0.509666
#> [3] train-logloss:0.435764
#> [4] train-logloss:0.376934
#> [5] train-logloss:0.331400
#> [6] train-logloss:0.295142
#> [7] train-logloss:0.265113
#> [8] train-logloss:0.243968
#> [9] train-logloss:0.222082
#> [10] train-logloss:0.203988
# extract importance
importance <- extract_importance_xgboost(fit = fit, feature_names = feature_nms)
importance
#> # A tibble: 14 × 5
#> algorithm feature importance rank weight
#> <chr> <chr> <dbl> <int> <dbl>
#> 1 xgboost lab3_muc3ac_score 0.177 1 0
#> 2 xgboost lab2_fluorescence_score 0.130 2 0
#> 3 xgboost lab6_ab_score 0.110 3 0
#> 4 xgboost lab1_telomerase_score 0.0969 4 0
#> 5 xgboost lab4_glucose_score 0.0853 5 0
#> 6 xgboost lab4_areg_score 0.0851 6 0
#> 7 xgboost lab1_molecules_score 0.0739 7 0
#> 8 xgboost lab1_actb 0.0697 8 0
#> 9 xgboost lab3_muc5ac_score 0.0588 9 0
#> 10 xgboost cea 0.0520 10 0
#> 11 xgboost lab5_neoplasia_v2_call 0.0237 11 0
#> 12 xgboost institution 0.0208 12 0
#> 13 xgboost lab5_neoplasia_v1_call 0.0110 13 0
#> 14 xgboost lab5_mucinous_call 0.00605 14 0
# }