Modeling for Default | Performance Summary

A summary of models evaluated to date and their performance modeling for default

Author

Adam Bushman

Published

October 29, 2024

library('tidyverse')
library('ggrepel')

Let’s read in all the results of the various models we’ve experimented with.

mcb1 <- read.csv('majority-class-baseline/model-results.csv')
elas <- read.csv('penalized-regression/model-results.csv')
svm <- read.csv('support-vector-machine/model-results.csv')
rf <- read.csv('random-forest/model-results.csv')

We then combine them into a single data frame we can use for plotting.

full_results <- 
    bind_rows(
        mcb1, elas, svm, rf
    ) |>
    as_tibble()

full_results
# A tibble: 6 × 6
  model                      hyperparameters     Accuracy Precision Recall   AUC
  <chr>                      <chr>                  <dbl>     <dbl>  <dbl> <dbl>
1 Majority Class, Imbalanced None                   0.919     0.919  1     0.5  
2 Majority Class, Balanced   None                   0.509     0.509  1     0.5  
3 Elastic Net                Lambda: 0.00234711…    0.697     0.660  0.793 0.762
4 Support Vector Machine     Sigma: 0.1 C: 10       0.834     0.890  0.753 0.832
5 Random Forest, 1           Trees: 500, Depth:…    0.942     0.898  0.882 0.971
6 Random Forest, 2           Trees: 800, Depth:…    0.957     0.922  0.912 0.978

We need to turn this into a along format. Also, we’ll wrap the labels:

full_results_long <- 
    pivot_longer(
        full_results, 
        cols = -c(model, hyperparameters)
    ) |>
    mutate(
        model = stringr::str_wrap(model, 15)
    )

Let’s generate a basic, comparison plot across the various measures.

ggplot(
    full_results_long, 
    aes(x = "1", y = value, label = paste0(model, " (", round(value,2) ,")"))
) +
    geom_point(
        aes(color = model), 
        show.legend = FALSE
    ) +
    geom_label_repel(
        aes(fill = model), 
        size = 1.9, 
        show.legend = FALSE
    ) +
    facet_wrap(~name, nrow = 1) +
    labs(
        title = "Modeling for default", 
        subtitle = "A summary of model performance"
    ) +
    theme_minimal() +
    theme(
        plot.title = element_text(face = "bold"), 
        plot.subtitle = element_text(face = "italic"), 
        axis.title.y = element_blank(), 
        axis.title.x = element_blank(), 
        axis.text.x = element_blank(), 
        strip.text = element_text(face = "bold", color = "white"), 
        strip.background = element_rect(fill = "#707271", color = NA), 
        panel.background = element_rect(fill = NA, color = "black"), 
        panel.grid.major.x = element_blank()
    )