From e2e6630f73e0ad5c05c89336440795f6f0b77f78 Mon Sep 17 00:00:00 2001 From: Stu Field Date: Wed, 5 Apr 2023 13:19:59 -0600 Subject: [PATCH] Add new proper vignettes (#35) - moved statistical examples into their own vignettes - they will now be rendered by `pkgdown` - simplifies `README` - four new vignettes: - loading-and-wrangling - binary-classification - linear-regression - two-group-comparison - fixes #35 --- DESCRIPTION | 1 + _pkgdown.yml | 29 +++++ vignettes/.gitignore | 2 + vignettes/SomaDataIO.Rmd | 26 ++-- vignettes/binary-classification.Rmd | 148 +++++++++++++++++++++++ vignettes/figures/.gitignore | 2 + vignettes/linear-regression.Rmd | 171 +++++++++++++++++++++++++++ vignettes/loading-and-wrangling.Rmd | 176 ++++++++++++++++++++++++++++ vignettes/two-group-comparison.Rmd | 155 ++++++++++++++++++++++++ 9 files changed, 703 insertions(+), 7 deletions(-) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/binary-classification.Rmd create mode 100644 vignettes/figures/.gitignore create mode 100644 vignettes/linear-regression.Rmd create mode 100644 vignettes/loading-and-wrangling.Rmd create mode 100644 vignettes/two-group-comparison.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index 8c4df99..70fbe1d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,6 +37,7 @@ Suggests: Biobase, ggplot2, knitr, + purrr, recipes, rmarkdown, spelling, diff --git a/_pkgdown.yml b/_pkgdown.yml index 312a1c4..7594e09 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -27,6 +27,35 @@ articles: contents: - SomaDataIO + - title: Loading and Wrangling + navbar: ~ + desc: > + How to load and manipulate a 'SomaScan' flat text file into + and R environment. + contents: + - loading-and-wrangling + + - title: Two-group Comparison + navbar: ~ + desc: > + Typical two-group comparison of 'SomaScan' data. + contents: + - two-group-comparison + + - title: Binary Classification + navbar: ~ + desc: > + Typical binary classification of 'SomaScan' data. + contents: + - binary-classification + + - title: Linear Regression + navbar: ~ + desc: > + Typical linear regression of continuous 'SomaScan' data. + contents: + - linear-regression + reference: - title: Load an ADAT desc: > diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/SomaDataIO.Rmd b/vignettes/SomaDataIO.Rmd index 42fd711..9a35ae7 100644 --- a/vignettes/SomaDataIO.Rmd +++ b/vignettes/SomaDataIO.Rmd @@ -1,6 +1,8 @@ --- title: "Introduction to SomaDataIO" -output: rmarkdown::html_vignette +output: + rmarkdown::html_vignette: + fig_caption: yes vignette: > %\VignetteIndexEntry{Introduction to SomaDataIO} %\VignetteEngine{knitr::rmarkdown} @@ -8,8 +10,8 @@ vignette: > --- ```{r setup, include = FALSE} -library(SomaDataIO) knitr::opts_chunk$set( + echo = TRUE, collapse = TRUE, comment = "#>" ) @@ -20,7 +22,8 @@ knitr::opts_chunk$set( This document accompanies the `SomaDataIO` R package, which loads and exports 'SomaScan' data via the SomaLogic Operating Co., Inc. proprietary text file called an ADAT (`*.adat`). -For file format see [here](https://github.com/SomaLogic/SomaLogic-Data/blob/master/README.md). +For file format see +[here](https://github.com/SomaLogic/SomaLogic-Data/blob/master/README.md). The package also exports auxiliary functions for manipulating, wrangling, and extracting relevant information from an ADAT object once in memory. Basic familiarity with the R environment is assumed, as is the ability to install @@ -42,6 +45,7 @@ contributed packages from the Comprehensive R Archive Network (CRAN). + `?SeqId` analyte (feature) matching. + `dplyr` and `tidyr` verb S3 methods for the `soma_adat` class. + `?rownames` helpers that do not break `soma_adat` attributes. + + please see vignette `vignette("loading-and-wrangling", package = "SomaDataIO")` * Exporting data (Output) + write out a `soma_adat` object as a `*.adat` text file. @@ -49,12 +53,20 @@ contributed packages from the Comprehensive R Archive Network (CRAN). ---------------------- -## Workflows and Analysis (TODO) +## Workflows and Analysis This section will become more fleshed out in future versions of -`SomaDataIO` +`SomaDataIO`. In the meantime, below are 3 examples of typical +primary statistical analyses that are commonly performed on +'SomaScan' data: -#### In the meantime please see the package -[README](https://github.com/SomaLogic/SomaDataIO/blob/main/README.md) +- Two-group comparison (e.g. differential expression) via *t*-test + + see vignette `vignette("two-group-comparison", package = "SomaDataIO")` + +- Binary classification + + see vignette `vignette("binary-classification", package = "SomaDataIO")` + +- Linear regression + + see vignette `vignette("linear-regression", package = "SomaDataIO")` --------------------- diff --git a/vignettes/binary-classification.Rmd b/vignettes/binary-classification.Rmd new file mode 100644 index 0000000..f4ad302 --- /dev/null +++ b/vignettes/binary-classification.Rmd @@ -0,0 +1,148 @@ +--- +title: "Binary Classification" +author: "Stu Field, SomaLogic Operating Co., Inc." +output: + rmarkdown::html_vignette: + fig_caption: yes +vignette: > + %\VignetteIndexEntry{Binary Classification} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +library(SomaDataIO) +library(dplyr) +library(tidyr) +library(purrr) +knitr::opts_chunk$set( + echo = TRUE, + collapse = TRUE, + comment = "#>", + fig.path = "figures/classify-" +) +``` + + +---------------- + + +## Classification via Logistic Regression + + +Although targeted statistical analyses are beyond the scope of +the `SomaDataIO` package, below is an example analysis +that typical users/customers would perform on 'SomaScan' data. + +It is not intended to be a definitive guide in statistical +analysis and existing packages do exist in the `R` ecosystem that perform +parts or extensions of these techniques. Many variations of the workflow +below exist, however the framework highlights how one could perform standard +_preliminary_ analyses on 'SomaScan' data. + + +## Data Preparation +```{r data-prep} +# the `example_data` package data +dim(example_data) + +table(example_data$SampleType) + +# center/scale +cs <- function(.x) { # .x = numeric vector + out <- .x - mean(.x) # center + out / sd(out) # scale +} + +# prepare data set for analysis +cleanData <- example_data |> + filter(SampleType == "Sample") |> # rm control samples + drop_na(Sex) |> # rm NAs if present + log10() |> # log10-transform (Math Generic) + mutate(Group = as.numeric(factor(Sex)) - 1) |> # map Sex -> 0/1 + modify_at(getAnalytes(example_data), cs) + +table(cleanData$Sex) + +table(cleanData$Group) # F = 0; M = 1 +``` + +## Set up Train/Test Data + +```{r train-test} +# idx = hold-out +# seed resulting in 50/50 class balance +idx <- withr::with_seed(3, sample(1:nrow(cleanData), size = nrow(cleanData) - 50)) +train <- cleanData[idx, ] +test <- cleanData[-idx, ] + +# assert no overlap +isTRUE( + all.equal(intersect(rownames(train), rownames(test)), character(0)) +) +``` + + +## Logistic Regression +We use the `cleanData`, `train`, and `test` data objects from above. + +### Predict Sex +```{r logreg-tbl} +LR_tbl <- getAnalyteInfo(train) |> + select(AptName, SeqId, Target = TargetFullName, EntrezGeneSymbol, UniProt) |> + mutate( + formula = map(AptName, ~ as.formula(paste("Group ~", .x))), # create formula + model = map(formula, ~ stats::glm(.x, data = train, family = "binomial", model = FALSE)), # fit glm() + beta_hat = map_dbl(model, ~ coef(.x)[2L]), # pull out coef Beta + p.value = map2_dbl(model, AptName, ~ { + summary(.x)$coefficients[.y, "Pr(>|z|)"] }), # pull out p-values + fdr = p.adjust(p.value, method = "BH") # FDR correction multiple testing + ) |> + arrange(p.value) |> # re-order by `p-value` + mutate(rank = row_number()) # add numeric ranks + +LR_tbl +``` + + +### Fit Model | Calculate Performance + +Next, select features for the model fit. We have a good idea of reasonable `Sex` +markers from prior knowledge (`CGA*`), and fortunately many of these are highly +ranked in `LR_tbl`. Below we fit a 4-marker logistic regression model from +cherry-picked gender-related features: + +```{r fit-logreg} +# AptName is index key between `LR_tbl` and `train` +feats <- LR_tbl$AptName[c(1L, 3L, 5L, 7L)] +form <- as.formula(paste("Group ~", paste(feats, collapse = "+"))) +fit <- glm(form, data = train, family = "binomial", model = FALSE) +pred <- tibble( + true_class = test$Sex, # orig class label + pred = predict(fit, newdata = test, type = "response"), # prob. 'Male' + pred_class = ifelse(pred < 0.5, "F", "M"), # class label +) +conf <- table(pred$true_class, pred$pred_class, dnn = list("Actual", "Predicted")) +tp <- conf[2L, 2L] +tn <- conf[1L, 1L] +fp <- conf[1L, 2L] +fn <- conf[2L, 1L] + +# Confusion matrix +conf + +# Classification metrics +tibble(Sensitivity = tp / (tp + fn), + Specificity = tn / (tn + fp), + Accuracy = (tp + tn) / sum(conf), + PPV = tp / (tp + fp), + NPV = tn / (tn + fn) +) +``` + + +--------------------- + + +Created by [Rmarkdown](https://github.com/rstudio/rmarkdown) +(v`r utils::packageVersion("rmarkdown")`) and `r R.version$version.string`. diff --git a/vignettes/figures/.gitignore b/vignettes/figures/.gitignore new file mode 100644 index 0000000..506eee3 --- /dev/null +++ b/vignettes/figures/.gitignore @@ -0,0 +1,2 @@ +*.png +*.html diff --git a/vignettes/linear-regression.Rmd b/vignettes/linear-regression.Rmd new file mode 100644 index 0000000..dd38450 --- /dev/null +++ b/vignettes/linear-regression.Rmd @@ -0,0 +1,171 @@ +--- +title: "Linear Regression" +author: "Stu Field, SomaLogic Operating Co., Inc." +output: + rmarkdown::html_vignette: + fig_caption: yes +vignette: > + %\VignetteIndexEntry{Linear Regression} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +library(SomaDataIO) +library(ggplot2) +library(dplyr) +library(tidyr) +library(purrr) +knitr::opts_chunk$set( + echo = TRUE, + collapse = TRUE, + comment = "#>", + fig.path = "figures/linear-reg-" +) +``` + + +-------------- + + +## Regression of Continuous Variables + +Although targeted statistical analyses are beyond the scope of +the `SomaDataIO` package, below is an example analysis +that typical users/customers would perform on 'SomaScan' data. + +It is not intended to be a definitive guide in statistical +analysis and existing packages do exist in the `R` ecosystem that perform +parts or extensions of these techniques. Many variations of the workflow +below exist, however the framework highlights how one could perform standard +_preliminary_ analyses on 'SomaScan' data. + + +## Data Preparation +```{r data-prep} +# the `example_data` package data +dim(example_data) + +table(example_data$SampleType) + +# center/scale +cs <- function(.x) { # .x = numeric vector + out <- .x - mean(.x) # center + out / sd(out) # scale +} + +# prepare data set for analysis +cleanData <- example_data |> + filter(SampleType == "Sample") |> # rm control samples + drop_na(Sex) |> # rm NAs if present + log10() |> # log10-transform (Math Generic) + mutate(Group = as.numeric(factor(Sex)) - 1) |> # map Sex -> 0/1 + modify_at(getAnalytes(example_data), cs) + +table(cleanData$Sex) + +table(cleanData$Group) # F = 0; M = 1 +``` + + +## Set up Train/Test Data + +```{r train-test} +# idx = hold-out +# seed resulting in 50/50 class balance +idx <- withr::with_seed(3, sample(1:nrow(cleanData), size = nrow(cleanData) - 50)) +train <- cleanData[idx, ] +test <- cleanData[-idx, ] + +# assert no overlap +isTRUE( + all.equal(intersect(rownames(train), rownames(test)), character(0)) +) +``` + + +## Linear Regression +We use the `cleanData`, `train`, and `test` data objects from above. + +### Predict Age +```{r linreg-tbl} +LinR_tbl <- getAnalyteInfo(train) |> # `train` from above + select(AptName, SeqId, Target = TargetFullName, EntrezGeneSymbol, UniProt) |> + mutate( + formula = map(AptName, ~ as.formula(paste("Age ~", .x, collapse = " + "))), + model = map(formula, ~ lm(.x, data = train, model = FALSE)), # fit linear models + slope = map_dbl(model, ~ coef(.x)[2L]), # pull out B_1 + p.value = map2_dbl(model, AptName, ~ { + summary(.x)$coefficients[.y, "Pr(>|t|)"] }), # pull out p-values + fdr = p.adjust(p.value, method = "BH") # FDR for multiple testing + ) |> + arrange(p.value) |> # re-order by `p-value` + mutate(rank = row_number()) # add numeric ranks + +LinR_tbl +``` + + +### Fit Model | Calculate Performance +Fit an 8-marker model with the top 8 features from `LinR_tbl`: + +```{r linreg-fit} +feats <- head(LinR_tbl$AptName, 8L) +form <- as.formula(paste("Age ~", paste(feats, collapse = "+"))) +fit <- lm(form, data = train, model = FALSE) +n <- nrow(test) +p <- length(feats) + +# Results +res <- tibble( + true_age = test$Age, + pred_age = predict(fit, newdata = test), + pred_error = pred_age - true_age +) + +# Lin's Concordance Correl. Coef. +# Accounts for location + scale shifts +linCCC <- function(x, y) { + stopifnot(length(x) == length(y)) + a <- 2 * cor(x, y) * sd(x) * sd(y) + b <- var(x) + var(y) + (mean(x) - mean(y))^2 + a / b +} + +# Regression metrics +tibble( + rss = sum(res$pred_error^2), # residual sum of squares + tss = sum((test$Age - mean(test$Age))^2), # total sum of squares + rsq = 1 - (rss / tss), # R-squared + rsqadj = max(0, 1 - (1 - rsq) * (n - 1) / (n - p - 1)), # Adjusted R-squared + R2 = stats::cor(res$true_age, res$pred_age)^2, # R-squared Pearson approx. + MAE = mean(abs(res$pred_error)), # Mean Absolute Error + RMSE = sqrt(mean(res$pred_error^2)), # Root Mean Squared Error + CCC = linCCC(res$true_age, res$pred_age) # Lin's CCC +) +``` + + +### Visualize Concordance +```{r linreg-plot, fig.width = 7, fig.height = 7, fig.align = "center"} +lims <- range(res$true_age, res$pred_age) +res |> + ggplot(aes(x = true_age, y = pred_age)) + + geom_point(colour = "#24135F", alpha = 0.5, size = 4) + + expand_limits(x = lims, y = lims) + # make square + geom_abline(slope = 1, colour = "black") + # add unit line + geom_rug(colour = "#286d9b", linewidth = 0.2) + + labs(y = "Predicted Age", x = "Actual Age") + + ggtitle("Concordance in Predicted vs. Actual Age") + + theme(plot.title = element_text(size = 21, face = "bold"), + axis.title.x = element_text(size = 14), + axis.title.y = element_text(size = 14)) +``` + + + +--------------------- + + +Created by [Rmarkdown](https://github.com/rstudio/rmarkdown) +(v`r utils::packageVersion("rmarkdown")`) and `r R.version$version.string`. diff --git a/vignettes/loading-and-wrangling.Rmd b/vignettes/loading-and-wrangling.Rmd new file mode 100644 index 0000000..732ebd7 --- /dev/null +++ b/vignettes/loading-and-wrangling.Rmd @@ -0,0 +1,176 @@ +--- +title: "Loading and Wrangling 'SomaScan'" +author: "Stu Field, SomaLogic Operating Co., Inc." +output: + rmarkdown::html_vignette: + fig_caption: yes +vignette: > + %\VignetteIndexEntry{Loading and Wrangling 'SomaScan'} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, echo = FALSE, results = FALSE, message = FALSE} +options(width = 80) +#Sys.setlocale("LC_COLLATE", "C") +Sys.setlocale("LC_COLLATE", "en_US.UTF-8") # ensure common sorting envir +library(SomaDataIO) +knitr::opts_chunk$set( + echo = TRUE, + collapse = TRUE, + comment = "#>", + fig.path = "figures/wrangling-" +) +``` + + +## Loading an ADAT + +```{r read-adat} +# Sample file name +f <- system.file("extdata", "example_data10.adat", + package = "SomaDataIO", mustWork = TRUE) +my_adat <- read_adat(f) +is.soma_adat(my_adat) + +# S3 print method forwards -> tibble +my_adat + +print(my_adat, show_header = TRUE) # if simply wish to see Header info + +# S3 summary method +# View Target and summary statistics +seqs <- tail(names(my_adat), 3L) +summary(my_adat[, seqs]) + +# Summarize by Sex +my_adat[, seqs] |> + split(my_adat$Sex) |> + lapply(summary) +``` + + +--------------------- + + +## Wrangling +### Attributes Contain File and Feature Information +```{r atts} +names(attributes(my_adat)) + +# The `Col.Meta` attribute contains +# target annotation information +attr(my_adat, "Col.Meta") +``` + +### Analyte Features (`seq.xxxx.xx`) +```{r feats} +getAnalytes(my_adat) |> head(20L) # first 20 analytes; see AptName above +getAnalytes(my_adat) |> length() # how many analytes +getAnalytes(my_adat, n = TRUE) # the `n` argument; no. analytes +``` + +### Feature Data +The `getAnalyteInfo()` function creates a lookup table that links +analyte feature names in the `soma_adat` object to the annotation +data in `?Col.Meta` via the common index-key, `AptName`, in column 1: + +```{r annotations} +getAnalyteInfo(my_adat) +``` + + +### Clinical Data +```{r meta} +getMeta(my_adat) # clinical meta data for each sample +getMeta(my_adat, n = TRUE) # also an `n` argument +``` + + +### ADAT structure + +The `soma_adat` object also contains specific structure that are useful +to users. Please also see `?colmeta` or `?annotations` for further +details about these fields. + +--------------------- + + + +### Group Generics +You may perform basic mathematical transformations on the feature data _only_ +with special `soma_adat` S3 methods (see `?groupGenerics`): + +```{r group-generics} +head(my_adat$seq.2429.27) + +logData <- log10(my_adat) # a typical log10() transform +head(logData$seq.2429.27) + +roundData <- round(my_adat) +head(roundData$seq.2429.27) + +sqData <- sqrt(my_adat) +head(sqData$seq.2429.27) + +antilog(1:4) + +sum(my_adat < 100) # low signalling values + +all.equal(my_adat, sqrt(my_adat^2)) + +all.equal(my_adat, antilog(log10(my_adat))) +``` + + +#### Math Generics + +```{r math} +getGroupMembers("Math") + +getGroupMembers("Compare") + +getGroupMembers("Arith") + +getGroupMembers("Summary") +``` + + +### Full Complement of [dplyr](https://dplyr.tidyverse.org) S3 Methods + +The `soma_adat` also comes with numerous class specific methods to the most +popular [dplyr](https://dplyr.tidyverse.org) generics that make working +with `soma_adat` objects simpler for those familiar with this standard toolkit: + +```{r dplyr} +dim(my_adat) +males <- dplyr::filter(my_adat, Sex == "M") +dim(males) + +males |> + dplyr::select(SampleType, SampleMatrix, starts_with("NormScale")) +``` + + +### Available S3 Methods `soma_adat` +```{r methods} +# see full complement of `soma_adat` methods +methods(class = "soma_adat") +``` + + +--------------------- + +## Writing a `soma_adat` +```{r write} +is_intact_attr(my_adat) # MUST have intact attrs + +write_adat(my_adat, file = tempfile("my-adat-", fileext = ".adat")) +``` + + +--------------------- + + +Created by [Rmarkdown](https://github.com/rstudio/rmarkdown) +(v`r utils::packageVersion("rmarkdown")`) and `r R.version$version.string`. diff --git a/vignettes/two-group-comparison.Rmd b/vignettes/two-group-comparison.Rmd new file mode 100644 index 0000000..f443aed --- /dev/null +++ b/vignettes/two-group-comparison.Rmd @@ -0,0 +1,155 @@ +--- +title: "Two-Group Comparison" +author: "Stu Field, SomaLogic Operating Co., Inc." +output: + rmarkdown::html_vignette: + fig_caption: yes +vignette: > + %\VignetteIndexEntry{Two-Group Comparison} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +library(SomaDataIO) +library(ggplot2) +library(dplyr) +library(tidyr) +library(purrr) +knitr::opts_chunk$set( + echo = TRUE, + collapse = TRUE, + comment = "#>", + fig.path = "figures/two-group-" +) +``` + + +-------------- + + +## Differential Expression via *t*-test + +Although targeted statistical analyses are beyond the scope of +the `SomaDataIO` package, below is an example analysis +that typical users/customers would perform on 'SomaScan' data. + +It is not intended to be a definitive guide in statistical +analysis and existing packages do exist in the `R` ecosystem that perform +parts or extensions of these techniques. Many variations of the workflow +below exist, however the framework highlights how one could perform standard +_preliminary_ analyses on 'SomaScan' data. + + +## Data Preparation +```{r data-prep} +# the `example_data` package data +dim(example_data) + +table(example_data$SampleType) + +# center/scale +cs <- function(.x) { # .x = numeric vector + out <- .x - mean(.x) # center + out / sd(out) # scale +} + +# prepare data set for analysis +cleanData <- example_data |> + filter(SampleType == "Sample") |> # rm control samples + drop_na(Sex) |> # rm NAs if present + log10() |> # log10-transform (Math Generic) + mutate(Group = as.numeric(factor(Sex)) - 1) |> # map Sex -> 0/1 + modify_at(getAnalytes(example_data), cs) + +table(cleanData$Sex) + +table(cleanData$Group) # F = 0; M = 1 +``` + + + +## Compare Two Groups (`M`/`F`) +### Get annotations via `getAnalyteInfo()`: + +```{r get-anno} +t_tests <- getAnalyteInfo(cleanData) |> + select(AptName, SeqId, Target = TargetFullName, EntrezGeneSymbol, UniProt) + +# Feature data info: +# Subset via dplyr::filter(t_tests, ...) here to +# restrict analysis to only certain analytes +t_tests +``` + + + +### Calculate `t-tests` +Use a "list columns" approach via nested tibble object +using `dplyr`, `purrr`, and `stats::t.test()` + +```{r t-tests} +t_tests <- t_tests |> + mutate( + formula = map(AptName, ~ as.formula(paste(.x, "~ Sex"))), # create formula + t_test = map(formula, ~ stats::t.test(.x, data = cleanData)), # fit t-tests + t_stat = map_dbl(t_test, "statistic"), # pull out t-statistic + p.value = map_dbl(t_test, "p.value"), # pull out p-values + fdr = p.adjust(p.value, method = "BH") # FDR for multiple testing + ) |> + arrange(p.value) |> # re-order by `p-value` + mutate(rank = row_number()) # add numeric ranks + +# View analysis tibble +t_tests +``` + + + +### Visualize with `ggplot2()` +Create a plotting tibble in the "long" format for `ggplot2`: + +```{r ggplot-data} +target_map <- head(t_tests, 12L) |> # mapping table + select(AptName, Target) # SeqId -> Target + +plot_tbl <- example_data |> + filter(SampleType == "Sample") |> # rm control samples + drop_na(Sex) |> # rm NAs if present + log10() |> # log10-transform for plotting + select(Sex, target_map$AptName) |> # top 12 analytes + pivot_longer(cols = -Sex, names_to = "AptName", values_to = "RFU") |> + dplyr::left_join(target_map, by = "AptName") |> + # order factor levels by 't_tests' rank to order plots below + mutate(Target = factor(Target, levels = target_map$Target)) + +plot_tbl +``` + +```{r seed, include = FALSE} +# seed for geom::jitter() so Git isn't triggered every time +set.seed(1) +``` + +```{r ggplot-boxes, fig.width = 7, fig.height = 7, fig.align = "center"} +plot_tbl |> + ggplot(aes(x = Sex, y = RFU, fill = Sex)) + + geom_boxplot(alpha = 0.5, outlier.shape = NA) + + scale_fill_manual(values = c("#24135F", "#00A499")) + + geom_jitter(shape = 16, width = 0.1, alpha = 0.5) + + facet_wrap(~ Target) + + ggtitle("Boxplots of Top Analytes by t-test") + + labs(y = "log10(RFU)") + + theme(plot.title = element_text(size = 21, face = "bold"), + axis.title.x = element_text(size = 14), + axis.title.y = element_text(size = 14), + legend.position = "top" + ) +``` + + +--------------------- + + +Created by [Rmarkdown](https://github.com/rstudio/rmarkdown) +(v`r utils::packageVersion("rmarkdown")`) and `r R.version$version.string`.