Gallery showing various tables possible with the {gtsummary} package. If you have created an interesting table using {gtsummary}, please submit it to the gallery via a pull request to the GitHub repository.
library(gtsummary); library(gt); library(survival)
library(dplyr); library(stringr); library(purrr); library(forcats)
Add a spanning header over the group columns for increased clarity, reporting the number of non-missing observations, and modifying column headers.
trial[c("trt", "age", "grade")] %>%
tbl_summary(
by = trt,
missing = "no",
statistic = all_continuous() ~ "{median} ({p25}, {p75}) [N = {N_nonmiss}]"
) %>%
modify_header(stat_by = md("**{level}**<br>N = {n} ({style_percent(p)}%)")) %>%
add_n() %>%
bold_labels() %>%
modify_spanning_header(starts_with("stat_") ~ "**Chemotherapy Treatment**")
Characteristic | N | Chemotherapy Treatment | |
---|---|---|---|
Drug A N = 98 (49%)1 |
Drug B N = 102 (51%)1 |
||
Age, yrs | 189 | 46 (37, 59) [N = 91] | 48 (39, 56) [N = 98] |
Grade | 200 | ||
I | 35 (36%) | 33 (32%) | |
II | 32 (33%) | 36 (35%) | |
III | 31 (32%) | 33 (32%) | |
1
Statistics presented: median (IQR) [N = N]; n (%)
|
Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.
trial[!is.na(trial$response), c("response", "age", "grade")] %>%
mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>%
tbl_summary(
by = response,
missing = "no",
label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
) %>%
add_p(pvalue_fun = partial(style_pvalue, digits = 2)) %>%
add_q()
#> Adjusting p-values with
#> `stats::p.adjust(x$table_body$p.value, method = "fdr")`
Characteristic | No Tumor Response, N = 1321 | Tumor Responded, N = 611 | p-value2 | q-value3 |
---|---|---|---|---|
Patient Age | 46 (36, 55) | 49 (43, 59) | 0.091 | 0.18 |
Tumor Grade | 0.93 | 0.93 | ||
I | 46 (35%) | 21 (34%) | ||
II | 44 (33%) | 19 (31%) | ||
III | 42 (32%) | 21 (34%) | ||
1
Statistics presented: median (IQR); n (%)
2
Statistical tests performed: Wilcoxon rank-sum test; chi-square test of independence
3
False discovery rate correction for multiple testing
|
Include missing tumor response as column using fct_explicit_na()
.
trial[c("response", "age", "grade")] %>%
mutate(
response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>%
fct_explicit_na(na_level = "Missing Response Status")
) %>%
tbl_summary(
by = response,
label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
)
Characteristic | No Tumor Response, N = 1321 | Tumor Responded, N = 611 | Missing Response Status, N = 71 |
---|---|---|---|
Patient Age | 46 (36, 55) | 49 (43, 59) | 52 (44, 57) |
Unknown | 7 | 3 | 1 |
Tumor Grade | |||
I | 46 (35%) | 21 (34%) | 1 (14%) |
II | 44 (33%) | 19 (31%) | 5 (71%) |
III | 42 (32%) | 21 (34%) | 1 (14%) |
1
Statistics presented: median (IQR); n (%)
|
Include p-values comparing all groups to a single reference group.
# table summarizing data with no p-values
t0 <- trial %>%
select(grade, age, response) %>%
tbl_summary(by = grade, missing = "no") %>%
modify_header(stat_by = md("**{level}**"))
# table comparing grade I and II
t1 <- trial %>%
select(grade, age, response) %>%
filter(grade %in% c("I", "II")) %>%
tbl_summary(by = grade, missing = "no") %>%
add_p() %>%
modify_header(p.value ~ md("**I vs. II**"))
# table comparing grade I and II
t2 <- trial %>%
select(grade, age, response) %>%
filter(grade %in% c("I", "III")) %>%
tbl_summary(by = grade, missing = "no") %>%
add_p() %>%
modify_header(p.value ~ md("**I vs. III**"))
# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) %>%
modify_spanning_header(
list(
starts_with("stat_") ~ "**Tumor Grade**",
starts_with("p.value") ~ "**p-value**"
)
) %>%
as_gt(include = -tab_spanner) %>%
# hiding repeated summary columns
cols_hide(columns = vars(stat_1_2, stat_2_2, stat_3_2, stat_1_3, stat_2_3, stat_3_3))
Characteristic | I1 | II1 | III1 | I vs. II2 | I vs. III2 |
---|---|---|---|---|---|
Age, yrs | 47 (37, 56) | 48 (37, 57) | 47 (38, 58) | 0.7 | 0.5 |
Tumor Response | 21 (31%) | 19 (30%) | 21 (33%) | >0.9 | 0.9 |
1
Statistics presented: median (IQR); n (%)
2
Statistical tests performed: Wilcoxon rank-sum test; Fisher's exact test
|
Add additional statistics as additional columns.
# define function for lower and upper bounds of the mean CI
ll <- function(x) t.test(x)$conf.int[1]
ul <- function(x) t.test(x)$conf.int[2]
t1 <-
trial %>%
select(age, marker) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>%
modify_header(stat_0 ~ "**Mean (SD)**")
t2 <-
trial %>%
select(age, marker) %>%
tbl_summary(statistic = all_continuous() ~ "{ll}, {ul}", missing = "no") %>%
modify_header(stat_0 ~ "**95% CI for Mean**")
tbl_merge(list(t1, t2)) %>%
modify_footnote(everything() ~ NA_character_) %>%
modify_spanning_header(everything() ~ NA_character_)
Characteristic | Mean (SD) | 95% CI for Mean |
---|---|---|
Age, yrs | 47 (14) | 45, 49 |
Marker Level, ng/mL | 0.92 (0.86) | 0.79, 1.04 |
Include number of observations and the number of events in a univariate regression table.
trial[c("response", "age", "grade")] %>%
tbl_uvregression(
method = glm,
y = response,
method.args = list(family = binomial),
exponentiate = TRUE
) %>%
add_nevent()
Characteristic | N | Event N | OR1 | 95% CI1 | p-value |
---|---|---|---|---|---|
Age, yrs | 183 | 58 | 1.02 | 1.00, 1.04 | 0.10 |
Grade | 193 | 61 | |||
I | — | — | |||
II | 0.95 | 0.45, 2.00 | 0.9 | ||
III | 1.10 | 0.52, 2.29 | 0.8 | ||
1
OR = Odds Ratio, CI = Confidence Interval
|
Include two related models side-by-side with descriptive statistics.
gt_r1 <- glm(response ~ age + trt, trial, family = binomial) %>%
tbl_regression(exponentiate = TRUE)
gt_r2 <- coxph(Surv(ttdeath, death) ~ age + trt, trial) %>%
tbl_regression(exponentiate = TRUE)
gt_t1 <- trial[c("age", "trt")] %>% tbl_summary(missing = "no") %>% add_n()
tbl_merge(
list(gt_t1, gt_r1, gt_r2),
tab_spanner = c("**Summary Statistics**", "**Tumor Response**", "**Time to Death**")
)
Characteristic | Summary Statistics | Tumor Response | Time to Death | |||||
---|---|---|---|---|---|---|---|---|
N | N = 2001 | OR2 | 95% CI2 | p-value | HR2 | 95% CI2 | p-value | |
Age, yrs | 189 | 47 (38, 57) | 1.02 | 1.00, 1.04 | 0.095 | 1.01 | 0.99, 1.02 | 0.4 |
Chemotherapy Treatment | 200 | |||||||
Drug A | 98 (49%) | — | — | — | — | |||
Drug B | 102 (51%) | 1.13 | 0.60, 2.13 | 0.7 | 1.31 | 0.89, 1.93 | 0.2 | |
1
Statistics presented: median (IQR); n (%)
2
OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio
|
Include the number of events at each level of a categorical predictor.
gt_model <-
trial[c("ttdeath", "death", "stage", "grade")] %>%
tbl_uvregression(
method = coxph,
y = Surv(ttdeath, death),
exponentiate = TRUE,
hide_n = TRUE
)
gt_eventn <-
trial %>%
filter(death == 1) %>%
select(stage, grade) %>%
tbl_summary(
statistic = all_categorical() ~ "{n}",
label = list(stage ~ "T Stage", grade ~ "Grade")
) %>%
modify_header(stat_0 ~ "**Event N**") %>%
modify_footnote(everything() ~ NA_character_)
tbl_merge(list(gt_eventn, gt_model)) %>%
bold_labels() %>%
italicize_levels() %>%
modify_spanning_header(everything() ~ NA_character_)
Characteristic | Event N | HR1 | 95% CI1 | p-value |
---|---|---|---|---|
T Stage | ||||
T1 | 24 | — | — | |
T2 | 27 | 1.18 | 0.68, 2.04 | 0.6 |
T3 | 22 | 1.23 | 0.69, 2.20 | 0.5 |
T4 | 39 | 2.48 | 1.49, 4.14 | <0.001 |
Grade | ||||
I | 33 | — | — | |
II | 36 | 1.28 | 0.80, 2.05 | 0.3 |
III | 43 | 1.69 | 1.07, 2.66 | 0.024 |
1
HR = Hazard Ratio, CI = Confidence Interval
|
Regression model where the covariate remains the same, and the outcome changes.
tbl_reg <-
trial[c("age", "marker", "trt")] %>%
tbl_uvregression(
method = lm,
x = trt,
show_single_row = "trt",
hide_n = TRUE
) %>%
modify_header(list(
label ~"**Model Outcome**",
estimate ~ "**Treatment Coef.**"
))
tbl_reg %>%
modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug group.")
Model Outcome | Treatment Coef.1 | 95% CI2 | p-value |
---|---|---|---|
Age, yrs | 0.44 | -3.7, 4.6 | 0.8 |
Marker Level, ng/mL | -0.20 | -0.44, 0.05 | 0.12 |
1
Values larger than 0 indicate larger values in the Drug group.
2
CI = Confidence Interval
|
Add descriptive statistics by treatment group to the table above to produce a table often reported two group comparisons.
gt_sum <-
trial[c("age", "marker", "trt")] %>%
mutate(trt = fct_rev(trt)) %>%
tbl_summary(by = trt,
statistic = all_continuous() ~ "{mean} ({sd})",
missing = "no") %>%
add_n() %>%
modify_header(stat_by = md("**{level}**"))
tbl_merge(list(gt_sum, tbl_reg)) %>%
modify_header(estimate_2 ~ "**Difference**") %>%
modify_spanning_header(everything() ~ NA_character_)
Characteristic | N | Drug B1 | Drug A1 | Difference | 95% CI2 | p-value |
---|---|---|---|---|---|---|
Age, yrs | 189 | 47 (14) | 47 (15) | 0.44 | -3.7, 4.6 | 0.8 |
Marker Level, ng/mL | 190 | 0.82 (0.83) | 1.02 (0.89) | -0.20 | -0.44, 0.05 | 0.12 |
1
Statistics presented: mean (SD)
2
CI = Confidence Interval
|
Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default()
.
my_tidy <- function(x, exponentiate = FALSE, conf.level = 0.95, ...) {
dplyr::bind_cols(
broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE),
# calculate the confidence intervals, and save them in a tibble
stats::confint.default(x) %>%
tibble::as_tibble() %>%
rlang::set_names(c("conf.low", "conf.high")) )
}
lm(age ~ grade + marker, trial) %>%
tbl_regression(tidy_fun = my_tidy)
Characteristic | Beta | 95% CI1 | p-value |
---|---|---|---|
Grade | |||
I | — | — | |
II | 0.64 | -4.6, 5.9 | 0.8 |
III | 2.4 | -2.8, 7.6 | 0.4 |
Marker Level, ng/mL | -0.04 | -2.6, 2.5 | >0.9 |
1
CI = Confidence Interval
|