Code
# TODO: add devel/tidy_rmsfit stuff to rcookbook
library(ggplot2)
library(gridExtra)
library(reshape2)
library(swt)
library(readxl)
Statistical report
This cookbook demonstrates how to use the Swisstransplant R package swt
to make high-quality reports and publication-ready graphics in our in-house style. It also provides links to important resources and documentation.
More details can be found in the swt package documentation.
# TODO: add devel/tidy_rmsfit stuff to rcookbook
library(ggplot2)
library(gridExtra)
library(reshape2)
library(swt)
library(readxl)
The swt
package contains the Swisstransplant statistical report as a Quarto project template. Quarto documents enable dynamic reporting and unite written text, statistical programming code, results, tables, and figures into a reproducible document. All the steps for obtaining the results from the data are precisely specified. This enables transparency, reproducibility, and trust in statistical analyses.
A new project in RStudio can be created using the Swisstransplant project template: Click New Project… then choose New Directory, and select the Swisstransplant Project Template at the bottom of the list.
The Swisstransplant Quarto template is written in markdown; for a guide to markdown basics, see https://quarto.org/docs/authoring/markdown-basics.html. It is possible to add callout blocks https://quarto.org/docs/authoring/callouts.html to highlight important information.
::: {.callout-note}
:
Note that there are five types of callouts, including`note`, `warning`, `important`, `tip`, and `caution`.
:::
Note that there are five types of callouts: note
, warning
, important
, tip
, and caution
.
Something important.
The new project will suggest a structure for an analysis pipeline. However, every project is different; therefore, only a rough structure is suggested.
The swt_colors()
function creates an object for user-friendly access to the Swisstransplant color scheme:
= swt::swt_colors()
col $blue.dark col
The following colors are available:
= swt_colors()
col par(mfrow=c(1,1), mai=c(0.5,0.1,0.2,0.1))
barplot(rep(1,12), axes=FALSE, col=c(col$blue.dark,
$blue.alt,
col$turkis.tpx,
col$yellow.donation,
col$strongred.akzent,
col$pink.heart,
col$red.liver,
col$darkyellow.kidney,
col$green.pancreas,
col$lightblue.lungs,
col$beige.intestine,
col$purple.alt
col
),names.arg = c("blue\ndark", "blue\nalt", "turkis\ntpx", "yellow\ndonat",
"strongr\nakzent", "pink\nheart", "red\nliver", "darkylw\nkidney",
"green\npancr", "lightb\nlungs", "beige\nintest", "purple\nalt"),
cex.names = 0.8
)
The swt_color
object also includes single hue palettes with three additional color strength:, 75%, 50%, and 25%.
par(mfrow=c(12,1), mai=c(0.1,0.1,0.2,0.1))
barplot(rep(1,4), axes=FALSE, col=c(col$pal.blue.swt[1],
$pal.blue.swt[2],
col$pal.blue.swt[3],
col$pal.blue.swt[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.blue.alt[1],
$pal.blue.alt[2],
col$pal.blue.alt[3],
col$pal.blue.alt[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.turkis.tpx[1],
$pal.turkis.tpx[2],
col$pal.turkis.tpx[3],
col$pal.turkis.tpx[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.yellow.donation[1],
$pal.yellow.donation[2],
col$pal.yellow.donation[3],
col$pal.yellow.donation[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.strongred.akzent[1],
$pal.strongred.akzent[2],
col$pal.strongred.akzent[3],
col$pal.strongred.akzent[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.pink.heart[1],
$pal.pink.heart[2],
col$pal.pink.heart[3],
col$pal.pink.heart[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.red.liver[1],
$pal.red.liver[2],
col$pal.red.liver[3],
col$pal.red.liver[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.darkyellow.kidney[1],
$pal.darkyellow.kidney[2],
col$pal.darkyellow.kidney[3],
col$pal.darkyellow.kidney[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.green.pancreas[1],
$pal.green.pancreas[2],
col$pal.green.pancreas[3],
col$pal.green.pancreas[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.lightblue.lungs[1],
$pal.lightblue.lungs[2],
col$pal.lightblue.lungs[3],
col$pal.lightblue.lungs[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.beige.intestine[1],
$pal.beige.intestine[2],
col$pal.beige.intestine[3],
col$pal.beige.intestine[4])
col
)
barplot(rep(1,4), axes=FALSE, col=c(col$pal.purple.alt[1],
$pal.purple.alt[2],
col$pal.purple.alt[3],
col$pal.purple.alt[4]),
colnames.arg = c("100%", "75%", "50%", "25%"),
cex.names = 0.8
)
ggplot2
Below are a few examples for creating various types of data plots in the SWT color scheme. The function swt_style()
adds the correct styling to the plot.
set.seed(1980)
= 100
n = c(rnorm(n/2, mean=0), rnorm(n/2, mean=3) )
var1 = data.frame(var1 = var1,
d var2 = var1 + rnorm(n, sd = 0.4),
group = as.factor(rep(c("abc", "mno" ), each=n/2))
)
= ggplot(d, aes(x=group, y=var1, group=group, col=group)) +
p1 geom_boxplot(fill=col$grey.bg) +
geom_point(size=2, shape=1, position = position_jitter(height = 0, width = 0.25),
col=c(rep(col$pal.blue.swt[1], n/2),
rep(col$pal.red.liver[1], n/2))) +
labs(title = "Title", tag = "A") +
scale_color_manual(values = c(col$blue.swt,
$red.liver)) +
colswt_style(legend_position = "none", grey_theme = TRUE, font_size = 14, title_size = 16) +
theme(plot.tag = element_text(size = 16, face = "bold"))
= ggplot(d, aes(x=group, y=var1, group=group, col=group)) +
p2 geom_boxplot() +
geom_point(size=2, position = position_jitter(height = 0, width = 0.25),
col=c(rep(col$pal.turkis.tpx[3], n/2),
rep(col$pal.darkyellow.kidney[3], n/2))) +
scale_color_manual(values = c(col$turkis.tpx,
$darkyellow.kidney)) +
colswt_style(legend_position = "none")
= ggplot(d, aes(x=var2, fill=group)) +
p3 geom_histogram(position = "identity", bins=20, alpha=0.5) +
scale_fill_manual(values = c(col$lightblue.lungs,
$beige.intestine)) +
colswt_style(grey_theme = FALSE)
= ggplot(d, aes(x=var2, y=var1, group=group, col=group)) +
p4 geom_point(size=2, alpha=0.5) +
scale_color_manual(values = c(col$blue.swt,
$yellow.donation)) +
collabs(title = "Title", tag = "D") +
swt_style(legend_position = "bottom", grey_theme = TRUE)
grid.arrange(p1, p2, p3, p4, nrow = 2, ncol = 2)
# Data taken from Annual Report 2020 (p. 32)
.2 = t(array(c(
table313.3,17.2,18.6,18.4,17.0,
11.5,12.6,14.9,11.7,11.2,
1.8,4.6,3.8,6.7,5.8), dim = c(5,3)))
colnames(table3.2) = 2016:2020
rownames(table3.2) = c("Total", "DBD", "DCD")
.2 = melt(table3.2, varnames = c("Gruppe", "Jahr"), value.name = "Anzahl") data3
= -0.8
number_height
ggplot(data3.2, aes(x=Jahr, y=Anzahl, col=Gruppe, group=Gruppe)) +
# plot line with numbers
geom_line(data = data3.2, linewidth=1) +
geom_text(data = data3.2, aes(label=Anzahl), vjust=number_height,
col="black", size=4) +
# some adjustments (colors, axes, etc)
scale_color_manual(values=c(col$strongred.akzent,
$yellow.donation,
col$blue.swt)) +
colscale_y_continuous(breaks = seq(0,22,4), limits = c(0,22)) +
ylab("pmp") +
labs(title="Title",
subtitle = "Subtitle"
+
) swt_style(grey_theme = TRUE) +
theme(legend.position = "top")
# Data taken from Annual Report 2020 (p. 31)
.1 = t(array(c(
table396,106,126,100,96,
15,39,32,57,50), dim = c(5,2)))
1.totals = array(colSums(table3.1), dim = c(1,5))
table3.colnames(table3.1) = 2016:2020
colnames(table3.1.totals) = 2016:2020
rownames(table3.1) = c("DBD", "DCD")
rownames(table3.1.totals) = c("Total")
.1 = melt(table3.1, varnames = c("Gruppe", "Jahr"), value.name = "Anzahl")
data31.totals = melt(table3.1.totals, varnames = c("Gruppe", "Jahr"),
data3.value.name = "Anzahl")
= -0.8
number_height = 0.5
bar_with
ggplot(data3.1, aes(x=Jahr, y=Anzahl, fill=Gruppe, group=Gruppe)) +
# plot bars with numbers
geom_bar(data = data3.1, stat="identity", position="dodge", width=bar_with) +
geom_text(data = data3.1, aes(label=Anzahl), vjust=number_height,
position = position_dodge(width=bar_with)) +
# plot line with numbers
geom_line(data = data3.1.totals, col = col$strongred.akzent, linewidth=1) +
geom_text(data = data3.1.totals, aes(label=Anzahl), vjust=number_height,
position = position_dodge(width=bar_with), col=col$strongred.akzent) +
# some adjustments (colors, axes, etc)
scale_fill_manual(values=c(col$yellow.donation,
$blue.swt,
col$strongred.akzent)) +
colscale_y_continuous(breaks = seq(0,180,20), limits = c(0,180)) +
ylab("Personen") +
swt_style(grey_theme = TRUE)
Several helper functions are implemented to support statistical computing.
mean_sd
for the mean and standard deviationmedian_irq
for the median and interquartile rangefreq_perc
for the frequency count and percentmiss_perc
for the frequency count and percent for missing data (NA
)tidy_pvalues
for formatting p-valuesThe helper functions above are applied to single variables (vectors).
tidy_missing
displays missing data of all the variables in a data frame.set.seed(1980)
= data.frame(age = rnorm(n = 200, mean = 50, sd = 10))
data $hypertension = rbinom(n = 200, size = 1, prob = 0.20)
data
= data.frame(age = mean_sd(data$age),
tab hypertension = freq_perc(data$hypertension)
)= t(tab)
tab colnames(tab) = "Descriptives"
= as.data.frame(tab)
tab rownames(tab) = c("age, mean (SD)", "hypertension, count (%)")
tab
Descriptives | |
---|---|
age, mean (SD) | 50.2 (10.7) |
hypertension, count (%) | 57 (28.5) |
$age[1:5] = NA
datatidy_missing(data)
Missing | |
---|---|
age | 5 (2.5%) |
hypertension | 0 (0.0%) |
Dates are sometimes entered inconsistently. If we force the data type to be a date using the option col_types
, these values are discarded and show up as NA
.
We also get a warning; see below.
= as.data.frame(read_xlsx(path = "../data/dates.xlsx", col_types = "date")) data
Warning: Expecting date in A4 / R4C1: got '11.03.1980 11:11:11'
data
mydate |
---|
2021-08-22 16:43:01 |
2023-02-17 22:12:02 |
NA |
An alternative is to import them as data type text. The dates show up as numbers and, if not recognized, as characters. The numbers are the number of days since 1899-12-30; this is an Excel convention.
= as.data.frame(read_xlsx(path = "../data/dates.xlsx", col_types = "text"))
data data
mydate |
---|
44430.696539351855 |
44974.925023148149 |
11.03.1980 11:11:11 |
We can use the function num2date()
to convert numbers into dates. There is also a built-in filter that recognizes the alternative format, which can also be modified via the option; see ?num2date
.
$mydate = num2date(data$mydate, format = "%d.%m.%Y %H:%M:%OS", round = FALSE)
data data
mydate |
---|
2021-08-22 16:43:01 |
2023-02-17 22:12:02 |
1980-03-11 11:11:10 |
One disadvantage of handling dates as numbers is when performing data cleaning. For example, manually correcting dates or adding missing data may be necessary. This has to be done in numbers before the conversion into the POSIXct
data type. However, the function date2num()
can be used for this purpose.
= as.data.frame(read_xlsx(path = "../data/dates.xlsx", col_types = "text"))
data $mydate[3] = date2num("1980-03-11 02:10:00")
data$mydate = num2date(data$mydate)
data data
mydate |
---|
2021-08-22 16:43:01 |
2023-02-17 22:12:02 |
1980-03-11 02:10:00 |
Using num2date()
for all date variables will also handle the time zone consistently using CET
time (and CEST
during summer). Note that having mixed time zones UTC
, and CET
can cause offsets of 1 hour.
sessionInfo()
R version 4.4.1 (2024-06-14 ucrt)
Platform: x86_64-w64-mingw32/x64
Running under: Windows 10 x64 (build 19045)
Matrix products: default
locale:
[1] LC_COLLATE=English_Switzerland.utf8 LC_CTYPE=English_Switzerland.utf8
[3] LC_MONETARY=English_Switzerland.utf8 LC_NUMERIC=C
[5] LC_TIME=English_Switzerland.utf8
time zone: Europe/Zurich
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] readxl_1.4.3 swt_0.2 reshape2_1.4.4 gridExtra_2.3 ggplot2_3.5.1
loaded via a namespace (and not attached):
[1] gtable_0.3.5 jsonlite_1.8.8 dplyr_1.1.4 compiler_4.4.1
[5] tidyselect_1.2.1 Rcpp_1.0.13 stringr_1.5.1 splines_4.4.1
[9] scales_1.3.0 yaml_2.3.10 fastmap_1.2.0 lattice_0.22-6
[13] R6_2.5.1 plyr_1.8.9 labeling_0.4.3 generics_0.1.3
[17] knitr_1.48 MASS_7.3-61 htmlwidgets_1.6.4 tibble_3.2.1
[21] munsell_0.5.1 lubridate_1.9.3 pillar_1.9.0 rlang_1.1.4
[25] utf8_1.2.4 stringi_1.8.4 xfun_0.47 segmented_2.1-2
[29] timechange_0.3.0 cli_3.6.3 withr_3.0.1 magrittr_2.0.3
[33] digest_0.6.37 grid_4.4.1 nlme_3.1-166 lifecycle_1.0.4
[37] vctrs_0.6.5 evaluate_0.24.0 glue_1.7.0 farver_2.1.2
[41] cellranger_1.1.0 fansi_1.0.6 colorspace_2.1-1 rmarkdown_2.28
[45] tools_4.4.1 pkgconfig_2.0.3 htmltools_0.5.8.1