library(tidymodels) library(palmerpenguins)penguins <- palmerpenguins::penguins %>% drop_na() %>% #elimino valores perdidos select(-year,-sex, -island) #elimino columnas q no son numéricasglimpse(penguins) #observo variables restantes
## Rows: 333## Columns: 5## $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Ade…## $ bill_length_mm <dbl> 39.1, 39.5, 40.3, 36.7, 39.3, 38.9, 39.2, 41.1, 38.…## $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, 19.3, 20.6, 17.8, 19.6, 17.6, 21.…## $ flipper_length_mm <int> 181, 186, 195, 193, 190, 181, 195, 182, 191, 198, 1…## $ body_mass_g <int> 3750, 3800, 3250, 3450, 3650, 3625, 4675, 3200, 380…
library(rsample)set.seed(123) #setear la semillap_split <- penguins %>% initial_split(prop=0.75) # divido en 75%p_train <- training(p_split)p_split
## <Analysis/Assess/Total>## <250/83/333>
# para hacer validación cruzada estratificadap_folds <- vfold_cv(p_train, strata = species)
Estos son los datos de entrenamiento/prueba/total
#creo la recetarecipe_dt <- p_train %>% recipe(species~.) %>% step_corr(all_predictors()) %>% #elimino las correlaciones step_center(all_predictors(), -all_outcomes()) %>% #centrado step_scale(all_predictors(), -all_outcomes()) %>% #escalado prep() recipe_dt #ver la receta
## Data Recipe## ## Inputs:## ## role #variables## outcome 1## predictor 4## ## Training data contained 250 data points and no missing data.## ## Operations:## ## Correlation filter removed no terms [trained]## Centering for bill_length_mm, ... [trained]## Scaling for bill_length_mm, ... [trained]
Modelo de árboles de decisión
Vamos a utilizar el modelo por defecto
#especifico el modelo set.seed(123)vanilla_tree_spec <- decision_tree() %>% #arboles de decisión set_engine("rpart") %>% #librería rpart set_mode("classification") #modo para clasificarvanilla_tree_spec
## Decision Tree Model Specification (classification)## ## Computational engine: rpart
#armo el workflowtree_wf <- workflow() %>% add_recipe(recipe_dt) %>% #agrego la receta add_model(vanilla_tree_spec) #agrego el modelotree_wf
## ══ Workflow ═══════════════════════════════════════════════════════════════════════════════════════════## Preprocessor: Recipe## Model: decision_tree()## ## ── Preprocessor ───────────────────────────────────────────────────────────────────────────────────────## 3 Recipe Steps## ## ● step_corr()## ● step_center()## ● step_scale()## ## ── Model ──────────────────────────────────────────────────────────────────────────────────────────────## Decision Tree Model Specification (classification)## ## Computational engine: rpart
#modelo vanilla sin tunningset.seed(123) vanilla_tree_spec %>% fit_resamples(species ~ ., resamples = p_folds) %>% collect_metrics() #desanidar las metricas
## # A tibble: 2 x 5## .metric .estimator mean n std_err## <chr> <chr> <dbl> <int> <dbl>## 1 accuracy multiclass 0.953 10 0.0154## 2 roc_auc hand_till 0.946 10 0.0197
set.seed(123) trees_spec <- decision_tree() %>% set_engine("rpart") %>% set_mode("classification") %>% set_args(min_n = 20, cost_complexity = 0.1) #especifico hiperparámetrostrees_spec %>% fit_resamples(species ~ ., resamples = p_folds) %>% collect_metrics()
## # A tibble: 2 x 5## .metric .estimator mean n std_err## <chr> <chr> <dbl> <int> <dbl>## 1 accuracy multiclass 0.953 10 0.0154## 2 roc_auc hand_till 0.946 10 0.0197
05:00
#utilizamos la funcion last_fit junto al workflow y al split de datosfinal_fit_dt <- last_fit(tree_wf, split = p_split)final_fit_dt %>% collect_metrics()
## # A tibble: 2 x 3## .metric .estimator .estimate## <chr> <chr> <dbl>## 1 accuracy multiclass 0.916## 2 roc_auc hand_till 0.929
final_fit_dt %>% collect_predictions() %>% conf_mat(species, .pred_class) #para ver la matriz de confusión
## Truth## Prediction Adelie Chinstrap Gentoo## Adelie 33 2 0## Chinstrap 2 16 0## Gentoo 2 1 27
final_fit_dt %>% collect_predictions() %>% conf_mat(species, .pred_class) #para ver la matriz de confusión
## Truth## Prediction Adelie Chinstrap Gentoo## Adelie 33 2 0## Chinstrap 2 16 0## Gentoo 2 1 27
final_fit_dt %>% collect_predictions() %>% sens(species, .pred_class) #sensibilidad global del modelo
## # A tibble: 1 x 3## .metric .estimator .estimate## <chr> <chr> <dbl>## 1 sens macro 0.911
05:00
Dividimos los datos
Preprocesamiento de los datos
Especificamos el modelo y sus args
Armamos el workflow con la receta y el modelo
Tuneo de los hiperparámetros
Predicción y comparación de las métricas
p_recipe <- training(p_split) %>% recipe(species~.) %>% step_corr(all_predictors()) %>% step_center(all_predictors(), -all_outcomes()) %>% step_scale(all_predictors(), -all_outcomes()) %>% prep()p_recipe
## Data Recipe## ## Inputs:## ## role #variables## outcome 1## predictor 4## ## Training data contained 250 data points and no missing data.## ## Operations:## ## Correlation filter removed no terms [trained]## Centering for bill_length_mm, ... [trained]## Scaling for bill_length_mm, ... [trained]
rf_spec <- rand_forest() %>% set_engine("ranger") %>% set_mode("classification")
set.seed(123)rf_spec %>% fit_resamples(species ~ ., resamples = p_folds) %>% collect_metrics()
## # A tibble: 2 x 5## .metric .estimator mean n std_err## <chr> <chr> <dbl> <int> <dbl>## 1 accuracy multiclass 0.972 10 0.0108 ## 2 roc_auc hand_till 0.996 10 0.00192
rf2_spec <- rf_spec %>% set_args(mtry = 2)set.seed(123)rf2_spec %>% fit_resamples(species ~ ., resamples = p_folds) %>% collect_metrics()
## # A tibble: 2 x 5## .metric .estimator mean n std_err## <chr> <chr> <dbl> <int> <dbl>## 1 accuracy multiclass 0.972 10 0.0108 ## 2 roc_auc hand_till 0.996 10 0.00192
rf3_spec <- rf_spec %>% set_args(mtry = 3)set.seed(123)rf3_spec %>% fit_resamples(species ~ ., resamples = p_folds) %>% collect_metrics()
## # A tibble: 2 x 5## .metric .estimator mean n std_err## <chr> <chr> <dbl> <int> <dbl>## 1 accuracy multiclass 0.967 10 0.0104 ## 2 roc_auc hand_till 0.997 10 0.00115
rf4_spec <- rf_spec %>% set_args(mtry = 4)set.seed(123)rf4_spec %>% fit_resamples(species ~ ., resamples = p_folds) %>% collect_metrics()
## # A tibble: 2 x 5## .metric .estimator mean n std_err## <chr> <chr> <dbl> <int> <dbl>## 1 accuracy multiclass 0.964 10 0.00972## 2 roc_auc hand_till 0.996 10 0.00147
tune_spec <- rand_forest( mtry = tune(), trees = 1000, min_n = tune()) %>% set_mode("classification") %>% set_engine("ranger")tune_spec
## Random Forest Model Specification (classification)## ## Main Arguments:## mtry = tune()## trees = 1000## min_n = tune()## ## Computational engine: ranger
tune_wf <- workflow() %>% add_recipe(p_recipe) %>% add_model(tune_spec)set.seed(123)cv_folds <- vfold_cv(p_train, strata = species)tune_wf
## ══ Workflow ═══════════════════════════════════════════════════════════════════════════════════════════## Preprocessor: Recipe## Model: rand_forest()## ## ── Preprocessor ───────────────────────────────────────────────────────────────────────────────────────## 3 Recipe Steps## ## ● step_corr()## ● step_center()## ● step_scale()## ## ── Model ──────────────────────────────────────────────────────────────────────────────────────────────## Random Forest Model Specification (classification)## ## Main Arguments:## mtry = tune()## trees = 1000## min_n = tune()## ## Computational engine: ranger
doParallel::registerDoParallel()set.seed(123)tune_res <- tune_grid( tune_wf, resamples = cv_folds, grid = 20)tune_res
## # Tuning results## # 10-fold cross-validation using stratification ## # A tibble: 10 x 4## splits id .metrics .notes ## <list> <chr> <list> <list> ## 1 <split [224/26]> Fold01 <tibble [40 × 6]> <tibble [0 × 1]>## 2 <split [224/26]> Fold02 <tibble [40 × 6]> <tibble [0 × 1]>## 3 <split [224/26]> Fold03 <tibble [40 × 6]> <tibble [0 × 1]>## 4 <split [224/26]> Fold04 <tibble [40 × 6]> <tibble [0 × 1]>## 5 <split [224/26]> Fold05 <tibble [40 × 6]> <tibble [0 × 1]>## 6 <split [225/25]> Fold06 <tibble [40 × 6]> <tibble [0 × 1]>## 7 <split [225/25]> Fold07 <tibble [40 × 6]> <tibble [0 × 1]>## 8 <split [226/24]> Fold08 <tibble [40 × 6]> <tibble [0 × 1]>## 9 <split [227/23]> Fold09 <tibble [40 × 6]> <tibble [0 × 1]>## 10 <split [227/23]> Fold10 <tibble [40 × 6]> <tibble [0 × 1]>
best_auc <- select_best(tune_res, "roc_auc")final_rf <- finalize_model( tune_spec, best_auc)final_rf
## Random Forest Model Specification (classification)## ## Main Arguments:## mtry = 4## trees = 1000## min_n = 4## ## Computational engine: ranger
set.seed(123)final_wf <- workflow() %>% add_recipe(p_recipe) %>% add_model(final_rf)final_res <- final_wf %>% last_fit(p_split)final_res %>% collect_metrics()
## # A tibble: 2 x 3## .metric .estimator .estimate## <chr> <chr> <dbl>## 1 accuracy multiclass 0.952## 2 roc_auc hand_till 0.999
final_res %>% collect_predictions() %>% conf_mat(species, .pred_class)
## Truth## Prediction Adelie Chinstrap Gentoo## Adelie 39 0 1## Chinstrap 2 11 1## Gentoo 0 0 29
10:00
Preprocesamiento de los datos
Especificamos el modelo y sus args
Armamos el workflow con la receta y el modelo
Tuneo de los hiperparámetros
Predicción y comparación de las métricas
library(vip)set.seed(123)final_rf %>% set_engine("ranger", importance = "permutation") %>% fit(species ~ ., data = juice(p_recipe)) %>% vip(geom = "point")+ theme_xaringan()
tidymodels
library(tidymodels) library(palmerpenguins)penguins <- palmerpenguins::penguins %>% drop_na() %>% #elimino valores perdidos select(-year,-sex, -island) #elimino columnas q no son numéricasglimpse(penguins) #observo variables restantes
## Rows: 333## Columns: 5## $ species <fct> Adelie, Adelie, Adelie, Adelie, Adelie, Adelie, Ade…## $ bill_length_mm <dbl> 39.1, 39.5, 40.3, 36.7, 39.3, 38.9, 39.2, 41.1, 38.…## $ bill_depth_mm <dbl> 18.7, 17.4, 18.0, 19.3, 20.6, 17.8, 19.6, 17.6, 21.…## $ flipper_length_mm <int> 181, 186, 195, 193, 190, 181, 195, 182, 191, 198, 1…## $ body_mass_g <int> 3750, 3800, 3250, 3450, 3650, 3625, 4675, 3200, 380…
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Alt + f | Fit Slides to Screen |
Esc | Back to slideshow |