We need the following packages:
required_packages <- c("readxl",
"purrr",
"dplyr",
"stringr",
"tidyr",
"AMR",
"RColorBrewer",
"magrittr")
Installing those that are not already installed on the system:
for (package in required_packages) {
if (!package %in% installed.packages()) install.packages(package)
}
Loading the required packages:
devnull <- lapply(required_packages, require, character.only = TRUE)
Date and type of antimicrobial use per farm:
amu <- read_excel("AMU_KietStudy_Marc.xlsx")
It is a data frame that looks like this:
amu
## # A tibble: 90 × 5
## FarmID AgeUse_Day DurationOfUse_days ProductNo AAI
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 K06 30 4 1 sulfadimidine
## 2 K06 30 4 1 sulfaquinoxaline
## 3 K06 35 3 2 doxycyline
## 4 K06 35 3 2 tylosin
## 5 K07 1 4 1 oxytetracyline
## 6 K07 1 4 1 colistin
## 7 K07 36 1 2 ampicillin
## 8 K07 36 1 2 colistin
## 9 K07 38 3 3 flumequine
## 10 K07 38 3 4 enrofloxacin
## # … with 80 more rows
Antimicrobial class of the antimicrobial(s) against which each of the resistance genes is active:
classes <- read_excel("FarmvsClass.xlsx")
It’s a data frame that looks like this:
classes
## # A tibble: 81 × 2
## EvaGreen_Name Antimicrobial_Class
## <chr> <chr>
## 1 aac3-liacde Aminoglycoside
## 2 aac6-aph2 Aminoglycoside
## 3 aac6-lb Aminoglycoside
## 4 aac6-li Aminoglycoside
## 5 aac6-lla Aminoglycoside
## 6 aadA Aminoglycoside
## 7 aadE Aminoglycoside
## 8 aadE-like Aminoglycoside
## 9 acrA Multidrug
## 10 acrB Multidrug
## # … with 71 more rows
Note that each resistance gene can be active against antimicrobials of more than one antimicrobial class.
The names of the farms:
(farms <- paste0("K", c(formatC(6:9, 1, flag = "0"), 11:26)))
## [1] "K06" "K07" "K08" "K09" "K11" "K12" "K13" "K14" "K15" "K16" "K17" "K18"
## [13] "K19" "K20" "K21" "K22" "K23" "K24" "K25" "K26"
Let’s start by looking at the resistance genes concentrations in chicken only (maybe we’ll look at the rat data later on):
genes <- farms %>%
map(read_excel, path = "KietAnalysisData.xlsx") %>%
map(filter, Group.1 %in% c("Chicken-Control", "Chicken-Treatment")) %>%
map(select, -Group.1, -TotalLog2Value) %>%
setNames(farms)
In the excel file, the gene concentrations for a given farm (with
control and treatment experiments) are in a different tab. This
structure is mapped in the genes
object that is a list of
data frames, each of them containing the genes concentrations for a
given farm. As an example the data for the first farm look like
this:
genes[[1]]
## # A tibble: 10 × 84
## FarmID Group Sample_Name SamplingDay `aac3-liacde` `aac6-aph2` `aac6-lb`
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 K06 Control K06-BC Before 4.74 7.42 5.75
## 2 K06 Control K06-07C 7 1.96 5.55 2.21
## 3 K06 Control K06-14C 14 0.205 4.55 0.138
## 4 K06 Control K06-60C 60 1.46 6.01 1.69
## 5 K06 Control K06-EC End 0.856 5.57 1.08
## 6 K06 Treatment K06-B Before 3.65 6.20 5.18
## 7 K06 Treatment K06-07 7 4.00 5.85 3.93
## 8 K06 Treatment K06-14 14 1.96 5.01 0.803
## 9 K06 Treatment K06-60 60 5.36 3.07 5.93
## 10 K06 Treatment K06-E End 5.72 5.49 4.44
## # … with 77 more variables: `aac6-li` <dbl>, `aac6-lla` <dbl>, aadA <dbl>,
## # aadE <dbl>, `aadE-like` <dbl>, acrA <dbl>, acrB <dbl>, acrF <dbl>,
## # `aph2-lb` <dbl>, `aph2-lde` <dbl>, `aph3-lac` <dbl>, `aph3-lll` <dbl>,
## # arnA <dbl>, bacA_1 <dbl>, bacA_2 <dbl>, blaAMPC <dbl>, blaCMY2 <dbl>,
## # blaCTXM <dbl>, blaDHA <dbl>, blaPSE <dbl>, blaSHV <dbl>, blaTEM <dbl>,
## # blaZ <dbl>, cat <dbl>, cblA <dbl>, cepA <dbl>, cepA2 <dbl>, cfr <dbl>,
## # cfr2 <dbl>, cfxA <dbl>, `cmlA1-01` <dbl>, cmr <dbl>, dfrA <dbl>, …
Let’s check that the names of the columns are the same for all the farms:
genes %>%
map_df(names) %>%
apply(1, unique) %>%
.[map_int(., length) > 1]
## [[1]]
## [1] "FarmID" "K09"
There is a problem with with the FarmID
variable that is
called K09
is one of the farms. Let’s fix this. As the
names of the variables seem OK in the first farm:
(correct_names <- names(genes[[1]]))
## [1] "FarmID" "Group" "Sample_Name" "SamplingDay" "aac3-liacde"
## [6] "aac6-aph2" "aac6-lb" "aac6-li" "aac6-lla" "aadA"
## [11] "aadE" "aadE-like" "acrA" "acrB" "acrF"
## [16] "aph2-lb" "aph2-lde" "aph3-lac" "aph3-lll" "arnA"
## [21] "bacA_1" "bacA_2" "blaAMPC" "blaCMY2" "blaCTXM"
## [26] "blaDHA" "blaPSE" "blaSHV" "blaTEM" "blaZ"
## [31] "cat" "cblA" "cepA" "cepA2" "cfr"
## [36] "cfr2" "cfxA" "cmlA1-01" "cmr" "dfrA"
## [41] "dfrF" "ermA" "ermB" "ermC" "floR"
## [46] "folA" "fosB" "fox5" "macB" "mcr-1"
## [51] "mcr-2" "mcr-3" "mdtF" "mdtL" "mdtO"
## [56] "mecA" "mefa10" "mefA3" "mfsA" "oprD"
## [61] "oqxA" "oqxB" "qacA" "qacC" "qacE"
## [66] "qnrA" "qnrB" "qnrS" "sat4" "spc"
## [71] "strB" "sul1" "sul2" "sul3" "tetB"
## [76] "tetC-01" "tetM" "tetO" "tetQ" "tetW"
## [81] "tolC" "vanA" "vanB" "vatA"
Let’s use them as variables names for all the farms:
genes %<>% map(setNames, correct_names)
Note also that not all farms have a “Before” measurement in the control group:
(tmp <- genes %>%
map(~ .x %>% filter(SamplingDay == "Before") %>% select(Group)) %>%
.[map_int(., nrow) < 2] %>%
unlist())
## K14.Group K15.Group K16.Group K17.Group K18.Group K20.Group
## "Treatment" "Treatment" "Treatment" "Treatment" "Treatment" "Treatment"
## K21.Group K22.Group K23.Group
## "Treatment" "Treatment" "Treatment"
For the farms where the “Before” measurement is missing in the
control group, let’s just use the “Before” measurement of the treatment
group. For that, we need the following function where x
is
a data frame for one farm:
control_before <- function(x) {
y <- filter(x, SamplingDay == "Before")
y$Group <- "Control"
y$Sample_Name <- NA
rbind(x, y)
}
Let’s now use this function on all the farms that need to be fixed:
# the names of the farms that need to be fixed:
farms_to_fix <- tmp %>%
names() %>%
str_remove(".Group")
# the fixed data for these farms:
fixed_farms <- genes %>%
extract(farms_to_fix) %>%
map(control_before)
# updating the original data with the fixed data:
genes[farms_to_fix] <- fixed_farms
Here we compute aggregates of resistance genes, these aggregates being the sums of all the resistance genes but also the sum of all the resistance genes by class of antimicrobial against which they are effective. Indeed, we will perform the analyses in 3 different ways: per resistant gene, for all resistant genes altogether, and per class of antimicrobials against which the resistant genes are active.
Let’s start by retrieving the names of resistance genes:
(resistance_genes <- setdiff(names(genes[[1]]),
c("FarmID", "Group", "Sample_Name", "SamplingDay")))
## [1] "aac3-liacde" "aac6-aph2" "aac6-lb" "aac6-li" "aac6-lla"
## [6] "aadA" "aadE" "aadE-like" "acrA" "acrB"
## [11] "acrF" "aph2-lb" "aph2-lde" "aph3-lac" "aph3-lll"
## [16] "arnA" "bacA_1" "bacA_2" "blaAMPC" "blaCMY2"
## [21] "blaCTXM" "blaDHA" "blaPSE" "blaSHV" "blaTEM"
## [26] "blaZ" "cat" "cblA" "cepA" "cepA2"
## [31] "cfr" "cfr2" "cfxA" "cmlA1-01" "cmr"
## [36] "dfrA" "dfrF" "ermA" "ermB" "ermC"
## [41] "floR" "folA" "fosB" "fox5" "macB"
## [46] "mcr-1" "mcr-2" "mcr-3" "mdtF" "mdtL"
## [51] "mdtO" "mecA" "mefa10" "mefA3" "mfsA"
## [56] "oprD" "oqxA" "oqxB" "qacA" "qacC"
## [61] "qacE" "qnrA" "qnrB" "qnrS" "sat4"
## [66] "spc" "strB" "sul1" "sul2" "sul3"
## [71] "tetB" "tetC-01" "tetM" "tetO" "tetQ"
## [76] "tetW" "tolC" "vanA" "vanB" "vatA"
As a reminder, classes
looks like this:
classes
## # A tibble: 81 × 2
## EvaGreen_Name Antimicrobial_Class
## <chr> <chr>
## 1 aac3-liacde Aminoglycoside
## 2 aac6-aph2 Aminoglycoside
## 3 aac6-lb Aminoglycoside
## 4 aac6-li Aminoglycoside
## 5 aac6-lla Aminoglycoside
## 6 aadA Aminoglycoside
## 7 aadE Aminoglycoside
## 8 aadE-like Aminoglycoside
## 9 acrA Multidrug
## 10 acrB Multidrug
## # … with 71 more rows
Let’s add a category All
to the
Antimicrobial_Class
variable. This category will simply
correspond to all the resistance genes:
classes %<>%
bind_rows(data.frame(EvaGreen_Name = resistance_genes,
Antimicrobial_Class = "All"))
The antimicrobial classes against which each resistance gene is active now looks like:
(classes_names <- unique(classes$Antimicrobial_Class))
## [1] "Aminoglycoside" "Multidrug" "Polymyxin" "Other"
## [5] "Beta-Lactam" "Phenicol" "Sulfonamide" "MLSB"
## [9] "Quinolone" "Tetracycline" "Glycopeptide" "All"
Note that Other
and All
categories are
actually not antimicrobials classes per se. The following function
calculates the sum of the genes concentration for a given class
am_class
for the data frame gene_farm
of a
given farm:
sum_by_class <- function(am_class, gene_farm) {
classes %>%
filter(Antimicrobial_Class == am_class) %>%
pull(EvaGreen_Name) %>%
extract(gene_farm, .) %>%
rowSums()
}
The following function uses the one above and adds as many variables as antimicrobial classes (12) to the data frame of a given farm:
add_sums_by_class <- function(gene_farm) {
gene_farm %>%
map_dfc(classes_names, sum_by_class, .) %>%
setNames(classes_names) %>%
bind_cols(gene_farm, .)
}
Let’s use this function to add the sums of the genes concentrations to the data frames of all the farms:
genes %<>% map(add_sums_by_class)
As a reminder, the data on AMU look like this:
amu
## # A tibble: 90 × 5
## FarmID AgeUse_Day DurationOfUse_days ProductNo AAI
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 K06 30 4 1 sulfadimidine
## 2 K06 30 4 1 sulfaquinoxaline
## 3 K06 35 3 2 doxycyline
## 4 K06 35 3 2 tylosin
## 5 K07 1 4 1 oxytetracyline
## 6 K07 1 4 1 colistin
## 7 K07 36 1 2 ampicillin
## 8 K07 36 1 2 colistin
## 9 K07 38 3 3 flumequine
## 10 K07 38 3 4 enrofloxacin
## # … with 80 more rows
In order to work with AMU by antimicrobial class, we need to generate
the antimicrobial class of each of the antimicrobial of the
AAI
column. For that, we can use the
ab_group()
function of the AMR
package, but,
for consistency with the classes
data frame, we need to
generate a hash table that provide the correspondence between the
spelling of the antimicrobial class given by the
AMR::ab_group()
function and the spelling of the
antimicrobial class used in our classes
data frame:
# AMR package: classes data frame:
hash <- c("Aminoglycosides" = "Aminoglycoside",
"Amphenicols" = "Phenicol",
"Antifungals/antimycotics" = "Antifungals/antimycotics",
"Beta-lactams/penicillins" = "Beta-Lactam",
"Cephalosporins (3rd gen.)" = "Beta-Lactam",
"Macrolides/lincosamides" = "MLSB",
"Other antibacterials" = "Other",
"Polymyxins" = "Polymyxin",
"Quinolones" = "Quinolone",
"Tetracyclines" = "Tetracycline",
"Trimethoprims" = "Sulfonamide")
Let’s generate the correspondence data frame:
(amu_classes <- amu %>%
select(AAI) %>%
unique() %>%
mutate(AMR_package = map_chr(AAI, ab_group),
classes_df = hash[AMR_package]))
## Warning: in `as.ab()`: these values could not be coerced to a valid antimicrobial
## ID: "bromhexine".
## # A tibble: 25 × 3
## AAI AMR_package classes_df
## <chr> <chr> <chr>
## 1 sulfadimidine Trimethoprims Sulfonamide
## 2 sulfaquinoxaline Other antibacterials Other
## 3 doxycyline Tetracyclines Tetracycline
## 4 tylosin Macrolides/lincosamides MLSB
## 5 oxytetracyline Tetracyclines Tetracycline
## 6 colistin Polymyxins Polymyxin
## 7 ampicillin Beta-lactams/penicillins Beta-Lactam
## 8 flumequine Quinolones Quinolone
## 9 enrofloxacin Quinolones Quinolone
## 10 ceftiofur Cephalosporins (3rd gen.) Beta-Lactam
## # … with 15 more rows
Let’s check for missing values (given the warning we got in the above call):
filter(amu_classes, if_any(.fns = is.na))
## # A tibble: 1 × 3
## AAI AMR_package classes_df
## <chr> <chr> <chr>
## 1 bromhexine <NA> <NA>
Let’s fix it manually:
amu_classes[amu_classes$AAI == "bromhexine", "classes_df"] <- "Mucoactive agent"
Finally, we can use amu_classes
to make another hash
table:
hash <- with(amu_classes, setNames(classes_df, AAI))
And we can use this new hash table to add the antimicrobial class to
the amu
data frame:
(amu %<>% mutate(class = hash[AAI]))
## # A tibble: 90 × 6
## FarmID AgeUse_Day DurationOfUse_days ProductNo AAI class
## <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 K06 30 4 1 sulfadimidine Sulfonamide
## 2 K06 30 4 1 sulfaquinoxaline Other
## 3 K06 35 3 2 doxycyline Tetracycline
## 4 K06 35 3 2 tylosin MLSB
## 5 K07 1 4 1 oxytetracyline Tetracycline
## 6 K07 1 4 1 colistin Polymyxin
## 7 K07 36 1 2 ampicillin Beta-Lactam
## 8 K07 36 1 2 colistin Polymyxin
## 9 K07 38 3 3 flumequine Quinolone
## 10 K07 38 3 4 enrofloxacin Quinolone
## # … with 80 more rows
Here, the categories used in the class
variable of the
amu
data frame are consistent with the categories used in
the Antimicrobial_Class
of the classes
data
frame.
Let first define a few utility functions. This function duplicates
the first row of a data frame x
and paste it at the end of
the data frame:
duplicate_first <- function(x) {
bind_rows(x, x[1, ])
}
This function removes the last row of a data frame
x
:
remove_last <- function(x) {
x[-nrow(x), ]
}
This function gets the last value of the y
variable of a
data frame x
:
get_last <- function(x) {
unlist(x[nrow(x), "y"])
}
This function retrieves the sampling collection time from the data
frame x
of a farm
sampling_dates <- function(x) {
x %>%
pull(SamplingDay) %>%
unique() %>%
setdiff(c("Before", "After", "End")) %>%
c(0) %>%
as.integer() %>%
sort()
}
This function adds the sampling collection time points to the figure
of AMU, using the data data
that have been prepared for
this figure:
adding_sample_collection <- function(data) {
f <- function(x) {
if(nrow(x) > 1) {
y <- x %>%
filter(c(AgeUse_Day[-1] > end[-length(end)], NA)) %>%
pull(end) %>%
first()
if(is.na(y)) return(max(x$end))
return(y)
}
pull(x, end)
}
g <- function(y) {
data.frame(FarmID = rep(names(y), map(y, length)), x = unlist(y))
}
a <- map(genes, sampling_dates)
b <- data %>%
group_by(FarmID) %>%
summarise(y1 = min(y) - 1,
y2 = max(y) + 1)
data %>%
group_by(FarmID) %>%
group_split() %>%
map(f) %>%
unlist() %>%
map2(a, ., `+`) %>%
g() %>%
left_join(b, "FarmID") %$%
segments(x, y1, x, y2)
}
Preparing the data for plotting antimicrobial use:
tmp <- amu %>%
filter(!class %in% c("Other", "Mucoactive agent", "Antifungals/antimycotics")) %>%
mutate(end = AgeUse_Day + DurationOfUse_days) %>%
group_by(FarmID) %>%
group_split() %>%
map(arrange, AgeUse_Day, end) %>%
map(duplicate_first) %>%
bind_rows() %>%
mutate(y = row_number()) %>%
group_by(FarmID) %>%
group_split()
last_values <- tmp %>%
map_int(get_last)
tmp %<>%
map(remove_last) %>%
bind_rows()
classes_names <- tmp %>%
pull(class) %>%
unique()
hash <- classes_names %>%
length() %>%
brewer.pal("Set3") %>%
setNames(classes_names)
tmp %<>% mutate(color = hash[class])
Plotting the data:
x_max <- 135
lwd_val <- 2
y_val <- c(.16347245, .86858097)
opar <- par(plt = c(.05, .7, y_val))
plot(NA, xlim = c(-7, x_max), ylim = c(0, (max(tmp$y) + 1)),
type = "n", xlab = "age of flock (days)", ylab = "", axes = FALSE,
xaxs = "i", yaxs = "i")
box(bty = "o")
axis(1)
with(tmp, segments(AgeUse_Day, y, end, y, col = color, lwd = lwd_val))
abline(h = last_values, col = "grey")
abline(v = 0)
abline(v = seq(7, x_max, 7), col = "grey")
tmp %>%
group_by(FarmID) %>%
summarise(y = mean(y)) %$%
text(-3.5, y, FarmID, cex = .5)
adding_sample_collection(tmp)
par(plt = c(.71, 1, y_val), new = TRUE)
plot(1:10, type = "n", axes = FALSE, ann = FALSE)
legend("center", legend = names(hash), col = hash, lwd = lwd_val, lty = 1, bty = "n")
par(opar)
Note that there is missing information for farm K24 here.
The time points:
genes %>%
map(pull, SamplingDay) %>%
unlist() %>%
unique()
## [1] "Before" "7" "14" "60" "End" "After" "30" "90"
Generating new time points and ordering the data chronologically:
genes %<>% map(mutate,
SamplingDay2 = as.integer(recode(SamplingDay,
Before = "-7",
After = "0",
End = "120"))) %>%
map(arrange, Group, SamplingDay2) # making sure that data are arranged chronologically
A utility function for the function
plot_gene_concentration()
that follows after. This function
adds the dots and lines to a plot:
plot_data <- function(x, gene, col, lwd = 2, lty = 3) {
lines2 <- function(...) lines(..., col = col, lwd = lwd)
nrows <- nrow(x)
x2 <- x[-c(1, nrows), ]
x3 <- x[1:2, ]
x4 <- x[(nrows - 1):nrows, ]
points(x$SamplingDay2, x[[gene]], col = col, lwd = lwd)
lines2(x2$SamplingDay2, x2[[gene]])
lines2(x3$SamplingDay2, x3[[gene]], lty = lty)
lines2(x4$SamplingDay2, x4[[gene]], lty = lty)
}
Another utility function for the function
plot_gene_concentration()
that follows after. This function
plot the axes, their range and labels:
plot_frame <- function(..., type = "n") {
plot(type = type, xlim = c(-10, 120), axes = FALSE, ...)
ats <- c(-7, seq(0, 120, 20))
lbs <- ats
lbs[1] <- "before"
lbs[length(lbs)] <- "end"
axis(1, ats, lbs)
axis(2)
}
Let’s now define colors that we will use throughout for the control and treatment experiments:
exp_col <- c(treatment = "red", control = "blue")
The following function uses the two previous functions to plots the
gene concentrations as a function of time for a given resistance gene
gene
in a given farm farm
. And this for both
control and treatment experiments:
plot_gene_concentration <- function(farm, gene, text, col = exp_col) {
farm_dataset <- genes[[farm]]
plot_frame(farm_dataset$SamplingDay2, farm_dataset[[gene]],
xlab = "time after treatment (days)", ylab = "gene concentration")
farm_dataset %>%
filter(Group == "Control") %>%
plot_data(gene, col["control"])
farm_dataset %>%
filter(Group == "Treatment") %>%
plot_data(gene, col["treatment"])
mtext(text)
abline(v = 0, lwd = 2)
}
The text
argument is a character string to use as the
title of the plot. Plotting aac3-liacde
for control and
treatment experiments in farm K06:
plot_gene_concentration(farms[1], resistance_genes[1], resistance_genes[1])
legend("right", legend = names(exp_col), col = exp_col, lty = 1, lwd = 2, bty = "n")
Number of columns we want and some graph tuning:
ncols <- 4
plt_val <- c(.13, .92, .22, .9)
Plotting all the genes for the first farm:
opar <- par(mfrow = c(ceiling(length(resistance_genes) / ncols), ncols), plt = plt_val)
walk(resistance_genes, ~ plot_gene_concentration(farms[1], .x, .x))
par(opar)
Plotting all the farms for the first gene:
opar <- par(mfrow = c(ceiling(length(farms) / ncols), ncols), plt = plt_val)
walk(farms, ~ plot_gene_concentration(.x, resistance_genes[1], .x))
par(opar)
Here we want to plot on a single graph the data from all the farms
(with control and treatment experiments), with one line per experiment
time series. For that, we first need to standardize the genes
concentration in order to ensure that dynamics are comparable across
experiments and farms. The standardization is done by taking the
Before
measurement as a reference value.
The following function allows to standardize the data for all the resistance genes from one given experiment (i.e. either control or treatment in one given farm):
standardize_by_before_ <- function(x) {
rbind(rep(1, length(resistance_genes)),
sweep(as.matrix(x[x$SamplingDay != "Before", resistance_genes]), 2,
unlist(x[x$SamplingDay == "Before", resistance_genes]), `/`))
}
Here x
is the subset of the data frame of genes
concentrations of a given farm that correspond either to the control of
the treatment experiment. This function below uses the one above and
standardizes the data for all the resistance genes for both the control
and treatment experiments of a given farm:
standardize_by_before <- function(x) {
x[, resistance_genes] <- rbind(standardize_by_before_(filter(x, Group == "Control")),
standardize_by_before_(filter(x, Group == "Treatment")))
x
}
Now here x
is the data frame of genes concentrations of
a given farm. Let’s use this function to standardize the gene
concentrations for all the farms:
genes_standardized <- map(genes, standardize_by_before)
Let’s now plot the concentrations of a given gene across the farms on a single plot. Let’s first define the following function that draws the layout the plot:
plot_frame2 <- function(...) {
plot_frame(..., xlab = "time (days)", ylab = "standardized genes concentrations")
}
The same on with a log scale y-axis:
plot_frame2log <- function(...) {
plot_frame(..., xlab = "time (days)", ylab = "log(standardized genes concentrations)")
}
Let’s also define a function that plot vertical and horizontal lines and adds title:
lines_title <- function(x, h = 0) {
mtext(x)
abline(h = h, lwd = 2)
abline(v = 0, lwd = 2)
}
Let’s now explore various ways to plot the data.
Now, we can start exploring various options of plotting the data. Let’s start by considering this transformation function:
transformation <- function(x) {
mutate_at(x, 3, log10)
}
This function replaces infinity values in the third column of data
frame x
by some specified value:
replace_infinity <- function(x, infinity = -1000) {
mutate_at(x, 3, ~ replace(.x, is.infinite(.x), infinity))
}
With the 4 functions above being defined, we can proceed and explore plotting:
plot_gene_all_farms <- function(gene, infinity = -1000, h = 0, pf = plot_frame2) {
tmp <- genes_standardized %>%
map(extract, c("Group", "SamplingDay2", gene)) %>%
map(transformation)
tmp2 <- bind_rows(tmp)
pf(tmp2[[2]], tmp2[[3]])
tmp %>%
map(replace_infinity, infinity) %>%
map(~ split(.x, .x$Group)) %>%
unlist(recursive = FALSE) %>%
map(mutate, col = exp_col[(Group == "Control") + 1]) %>%
sample() %>% # we want to shuffle the treatments for unbiased visualization
walk(~ plot_data(.x, gene, col = .x$col))
lines_title(gene, h)
}
Note here that we transform the standardized genes concentrations.
Depending on the chosen transformation, it may generate infinity values.
For visualization purpose we replace these values by the big number
infinity
in the list of the function’s arguments. Let’s try
it on one resistance gene:
plot_gene_all_farms(resistance_genes[1], pf = plot_frame2log)
legend("bottomright", legend = names(exp_col), col = exp_col, lty = 1, lwd = 2, bty = "n")
Let’s try an alternative visualization, using this following function for box-plots:
boxplot2 <- function(x, eps, col, ...) {
boxplot(x[[3]] ~ x[[2]], at = unique(x[[2]]) + eps, add = TRUE, axes = FALSE,
boxwex = 2.5, col = adjustcolor(col, .5), outline = FALSE, ...)
}
Here is the alternative visualization:
plot_gene_all_farms <- function(gene, col = exp_col, eps = 1.5, h = 0,
transform = I, plot_frame = plot_frame2, plot_fct = boxplot2) {
tmp <- genes_standardized %>%
bind_rows() %>%
filter(SamplingDay != "Before") %>%
select(c("Group", "SamplingDay2", gene)) %>%
transform()
plot_frame(tmp[[2]], tmp[[3]])
control <- tmp %>%
filter(Group == "Control") %>%
arrange(SamplingDay2) # because boxplot() and vioplot() are not used with the formula option
points(jitter(control[[2]] - eps), control[[3]], col = col["control"])
plot_fct(control, -eps, col = col["control"])
treatment <- tmp %>%
filter(Group == "Treatment") %>%
arrange(SamplingDay2) # because boxplot() and vioplot() are not used with the formula option
points(jitter(treatment[[2]] + eps), treatment[[3]], col = col["treatment"])
plot_fct(treatment, eps, col = col["treatment"])
lines_title(gene, h)
}
Let’s try it:
plot_gene_all_farms(resistance_genes[1], h = 1)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(gene)` instead of `gene` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
legend("topright", legend = names(exp_col), fill = adjustcolor(exp_col, .5), bty = "n")
But a log scale might be more appropriate:
plot_gene_all_farms(resistance_genes[1], transform = transformation, plot_frame = plot_frame2log)
legend("bottomright", legend = names(exp_col), fill = adjustcolor(exp_col, .5), bty = "n")
Let’s try a violin plot instead, by using this function:
vioplot2 <- function(x, eps, color, ...) {
vioplot::vioplot(x[[3]] ~ x[[2]], at = unique(x[[2]]) + eps, add = TRUE,
axes = FALSE, fill = color, lineCol = color, border = color,
wex = 4, col = adjustcolor(color, .5), ...)
}
Let’s try plotting the data with the violin plot:
plot_gene_all_farms(resistance_genes[1], h = 1, plot_fct = vioplot2)
legend("topright", legend = names(exp_col), fill = adjustcolor(exp_col, .5), bty = "n")
plot_frame3 <- function(...) {
plot_frame(..., xlab = "time (days)",
ylab = "difference in standardized genes concentrations")
}
Let’s plot the difference between treatment and control for each farm. For that, we need the following function:
plot_differences <- function(gene) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", gene)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control)
tmp %>%
bind_rows() %$%
plot_frame3(SamplingDay2, difference)
walk(tmp, with, lines(SamplingDay2, difference, col = "green", lwd = 2))
lines_title(gene)
}
Let’s try it:
plot_differences(resistance_genes[1])
Let’s consider this function for box-plots and violin plots:
plot_differences <- function(gene, plot_fct = boxplot2) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", gene)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows() %>%
arrange(SamplingDay2) %>% # for boxplot() and vioplot()
select(Control, SamplingDay2, difference) # for boxplot2() and vioplot2()
with(tmp, plot_frame3(jitter(SamplingDay2), difference, col = "green", type = "p"))
plot_fct(tmp, 0, col = "green")
lines_title(gene)
}
Let’s try it:
plot_differences(resistance_genes[1], boxplot2)
Let’s look at the violin alternative:
plot_differences(resistance_genes[1], vioplot2)
A tuning of the polygon()
function:
polygon2 <- function(x, y1, y2, ...) {
polygon(c(x, rev(x)), c(y1, rev(y2)), ...)
}
A tuning of the arrows()
function:
arrows2 <- function(...) {
arrows(length = .1, angle = 90, code = 3, ...)
}
Let’s consider another option.
plot_differences_quantiles <- function(gene, data = genes,
col_lines = "steelblue3", col_area = "lightblue") {
tmp <- data %>%
map(extract, c("SamplingDay2", "Group", gene)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows() %>%
select(SamplingDay2, difference)
with(tmp, plot_frame3(jitter(SamplingDay2), difference, col = col_area, type = "p"))
x_val <- sort(unique(tmp$SamplingDay2))
tmp %>%
group_by(SamplingDay2) %>%
group_split() %>%
map(~ quantile(.x$difference, c(.25, .5, .75), na.rm = TRUE)) %>%
bind_rows() %>%
mutate(x_val = x_val) %>%
with({
polygon2(x_val, `25%`, `75%`,
col = adjustcolor(col_area, .2), border = col_lines, lty = 3)
points(x_val, `50%`, lwd = 2, col = col_lines)
arrows2(x_val, `25%`, x_val, `75%`, lwd = 2, col = col_lines)
lines(x_val, `50%`, lty = 2, col = col_lines)
})
lines_title(gene)
}
Let’s try it:
plot_differences_quantiles(resistance_genes[1])
Let’s plot all the genes:
opar <- par(mfrow = c(ceiling(length(resistance_genes) / ncols), ncols), plt = plt_val)
walk(resistance_genes, plot_differences_quantiles)
par(opar)
farm K08: Tetracycline + MLSB: here we want
The following function calculates the gene concentrations differences
between Treatment and Control, for each genes of an experiment and at
each time point. x
is the data of this experiment.
gene_conc_diff <- function(x) {
x %<>%
group_by(Group) %>%
group_split()
tmp <- x[[1]][[1]]
x %>%
map(select, -Group) %>%
rev() %>%
do.call(`-`, .) %>%
mutate(SamplingDay2 = tmp)
}
The following function calculates the quantiles of genes
concentration, across genes at for each time point. The data frame
x
has time values in the column SamplingDay2
and then genes concentrations by column.
gene_conc_diff_quant <- function(x) {
tmp <- x$SamplingDay2
x %>%
select(-SamplingDay2) %>%
apply(1, quantile, c(.25, .5, .75)) %>%
t() %>%
as.data.frame() %>%
mutate(SamplingDay2 = tmp)
}
The following function plots the data of some genes concentrations of an experiment:
plot_data <- function(x, col_lines = "steelblue3", col_area = "lightblue",
lwd_val = 2, add = FALSE, ...) {
if (add) {
plot_fct <- function(x, y, ...) {
points(jitter(x), y, col = col_area, ...)
}
} else {
plot_fct <- function(x, y, ...) {
plot_frame3(jitter(x), y, col = col_area, type = "p", ...)
}
}
x %>%
pivot_longer(-SamplingDay2) %$%
plot_fct(SamplingDay2, value, ...)
x %>%
gene_conc_diff_quant() %>%
with({
polygon2(SamplingDay2, `25%`, `75%`, col = adjustcolor(col_area, .2),
border = col_lines, lty = 3)
points(SamplingDay2, `50%`, lwd = lwd_val, col = col_lines)
arrows2(SamplingDay2, `25%`, SamplingDay2, `75%`, lwd = lwd_val, col = col_lines)
lines(SamplingDay2, `50%`, lty = lwd_val, col = col_lines)
})
}
The following function plot the data of a set ab
of
resistance genes in farm farm
:
plot_ab_set <- function(farm, ab, ...) {
genes_set <- classes %>%
filter(Antimicrobial_Class %in% ab) %>%
pull(EvaGreen_Name)
genes %>%
extract2(farm) %>%
extract(c("SamplingDay2", "Group", genes_set)) %>%
gene_conc_diff() %>%
plot_data(., ...)
}
Let’s generate the vector that contains the names of the antimicrobial classes used in the experiments:
ab_classes <- classes %>%
extract2("Antimicrobial_Class") %>%
unique() %>%
setdiff(c("Multidrug", "Other", "All"))
The following function calculates the y-axis range we should choose for a given farm:
y_range <- function(farm) {
genes_set <- classes %>%
filter(Antimicrobial_Class %in% ab_classes) %>%
pull(EvaGreen_Name)
genes %>%
extract2(farm) %>%
extract(c("SamplingDay2", "Group", genes_set)) %>%
gene_conc_diff() %>%
select(-SamplingDay2) %>%
range()
}
Parameters:
lwd_v <- 2
farm <- "K08"
ab1 <- "Tetracycline"
ab2 <- "MLSB"
Separated:
plot_ab_set(farm, ab1,
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, ab2,
col_lines = "indianred1", col_area = "darksalmon", lwd_val = lwd_v, add = TRUE)
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
abline(h = 0)
legend("bottomright", legend = c(ab1, ab2, "others"),
fill = c("steelblue3" , "indianred1", "grey"), bty = "n")
Combined:
plot_ab_set(farm, c(ab1, ab2),
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
abline(h = 0)
legend("bottomright", legend = c(paste(ab1, ab2, sep = " and "), "others"),
fill = c("steelblue3" , "grey"), bty = "n")
Parameters:
lwd_v <- 2
farm <- "K09"
ab1 <- "Tetracycline"
ab2 <- "Polymyxin"
Separated:
plot_ab_set(farm, ab1,
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, ab2,
col_lines = "indianred1", col_area = "darksalmon", lwd_val = lwd_v, add = TRUE)
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
abline(h = 0)
legend("bottomright", legend = c(ab1, ab2, "others"),
fill = c("steelblue3" , "indianred1", "grey"), bty = "n")
Combined:
plot_ab_set(farm, c(ab1, ab2),
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
abline(h = 0)
legend("bottomright", legend = c(paste(ab1, ab2, sep = " and "), "others"),
fill = c("steelblue3" , "grey"), bty = "n")
Parameters:
lwd_v <- 2
farm <- "K11"
ab1 <- "Beta-Lactam"
ab2 <- "Aminoglycoside"
Separated:
plot_ab_set(farm, ab1,
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
plot_ab_set(farm, ab2,
col_lines = "indianred1", col_area = "darksalmon", lwd_val = lwd_v, add = TRUE)
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
abline(h = 0)
legend("top", legend = c(ab1, ab2, "others"),
fill = c("steelblue3" , "indianred1", "grey"), bty = "n")
Combined:
plot_ab_set(farm, c(ab1, ab2),
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
abline(h = 0)
legend("top", legend = c(paste(ab1, ab2, sep = " and "), "others"),
fill = c("steelblue3" , "grey"), bty = "n")
Parameters:
lwd_v <- 2
farm <- "K12"
ab1 <- "Beta-Lactam"
ab2 <- "Aminoglycoside"
Separated:
plot_ab_set(farm, ab1,
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
plot_ab_set(farm, ab2,
col_lines = "indianred1", col_area = "darksalmon", lwd_val = lwd_v, add = TRUE)
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
abline(h = 0)
legend("bottomright", legend = c(ab1, ab2, "others"),
fill = c("steelblue3" , "indianred1", "grey"), bty = "n")
Combined:
plot_ab_set(farm, c(ab1, ab2),
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
abline(h = 0)
legend("bottomright", legend = c(paste(ab1, ab2, sep = " and "), "others"),
fill = c("steelblue3" , "grey"), bty = "n")
Parameters:
lwd_v <- 2
farm <- "K13"
ab1 <- "Beta-Lactam"
ab2 <- "Polymyxin"
ab3 <- "MLSB"
ab4 <- "Tetracycline"
Combined:
plot_ab_set(farm, c(ab1, ab2, ab3, ab4),
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2, ab3, ab4)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
abline(h = 0)
legend("bottomright",
legend = c(paste(paste(ab1, ab2, ab3, sep = ", "), ab2, sep = " and "), "others"),
fill = c("steelblue3" , "grey"), bty = "n")
Parameters:
lwd_v <- 2
farm <- "K19"
ab1 <- "Quinolone"
Separated:
plot_ab_set(farm, ab1,
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, setdiff(ab_classes, ab1),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
abline(h = 0)
legend("bottomright", legend = c(ab1, "others"),
fill = c("steelblue3" , "grey"), bty = "n")
Parameters:
lwd_v <- 2
farm <- "K22"
ab1 <- "Tetracycline"
ab2 <- "Polymyxin"
Separated:
plot_ab_set(farm, ab1,
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
plot_ab_set(farm, ab2,
col_lines = "indianred1", col_area = "darksalmon", lwd_val = lwd_v, add = TRUE)
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
abline(h = 0)
legend("topright", legend = c(ab1, ab2, "others"),
fill = c("steelblue3" , "indianred1", "grey"), bty = "n")
Combined:
plot_ab_set(farm, c(ab1, ab2),
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
plot_ab_set(farm, setdiff(ab_classes, c(ab1, ab2)),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
## Warning in arrows(length = 0.1, angle = 90, code = 3, ...): zero-length arrow is
## of indeterminate angle and so skipped
abline(h = 0)
legend("topright", legend = c(paste(ab1, ab2, sep = " and "), "others"),
fill = c("steelblue3" , "grey"), bty = "n")
Parameters:
lwd_v <- 2
farm <- "K25"
ab1 <- "Quinolone"
Separated:
plot_ab_set(farm, ab1,
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, setdiff(ab_classes, ab1),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
abline(h = 0)
legend("bottomright", legend = c(ab1, "others"),
fill = c("steelblue3" , "grey"), bty = "n")
Parameters:
lwd_v <- 2
farm <- "K26"
ab1 <- "Tetracycline"
Separated:
plot_ab_set(farm, ab1,
col_lines = "steelblue3", col_area = "lightblue", lwd_val = lwd_v, ylim = y_range(farm))
plot_ab_set(farm, setdiff(ab_classes, ab1),
col_lines = "grey", col_area = "grey", lwd_val = lwd_v, add = TRUE)
abline(h = 0)
legend("bottomright", legend = c(ab1, "others"),
fill = c("steelblue3" , "grey"), bty = "n")
The following function calculates the difference between treatment
and control for a given gene gene
for all the farms:
calc_differences <- function(gene) {
genes %>%
map(extract, c("SamplingDay2", "Group", gene)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows(.id = "farm") %>%
select(-Control, -Treatment) %>%
setNames(c("farm", "SamplingDay2", gene))
}
Calculating genes differences between treatment and control, across farms:
differences <- resistance_genes %>%
map(calc_differences) %>%
reduce(left_join, by = c("farm", "SamplingDay2"))
Let’s do a principal component analysis (PCA) on these differences:
pca <- differences %>%
filter(if_all(.fns = Negate(is.na))) %>%
select(-farm, -SamplingDay2) %>%
t() %>%
as.data.frame() %>%
select(where(~ any(. != 0))) %>%
prcomp(scale. = TRUE)
Preparing the PCA data to plot:
# a hash table that converts an antimicrobial name into a class name:
hash1 <- classes %>%
filter(Antimicrobial_Class != "All") %>%
unique() %$%
setNames(Antimicrobial_Class, EvaGreen_Name)
# a hash table that converts a class name into a color:
hash2 <- setNames(brewer.pal(11, "Set3"), unique(hash1))
# PC1 and PC2 coordinates of the PCA:
xs <- pca$x
xs %<>%
as.data.frame() %>%
select(PC1, PC2) %>%
mutate(class = hash1[rownames(xs)],
color = hash2[class])
# adding the percentage of variance as metadata:
variance <- pca$sdev^2
variance <- variance / sum(variance)
attr(xs, "percvar") <- round(100 * variance[1:2])
A function that plots the data points of the first two components of the PCA, highlighting a given class of antimicrobials:
pca_biplot <- function(abclass, xs) {
percvar <- attr(xs, "percvar")
with(xs, plot(PC1, PC2, main = abclass,
xlab = paste0("PC1 (", percvar[1], "%)"),
ylab = paste0("PC2 (", percvar[2], "%)")))
xs %>%
filter(class == abclass) %$%
points(PC1, PC2, col = "red", pch = 19)
abline(v = 0, lty = 2, col = "grey")
abline(h = 0, lty = 2, col = "grey")
}
The number of genes per antimicrobial classes:
(ordered_classes <- classes %>%
filter(Antimicrobial_Class != "All") %>%
unique() %>%
group_by(Antimicrobial_Class) %>%
tally() %>%
arrange(desc(n)))
## # A tibble: 11 × 2
## Antimicrobial_Class n
## <chr> <int>
## 1 Multidrug 18
## 2 Aminoglycoside 14
## 3 Beta-Lactam 14
## 4 MLSB 7
## 5 Sulfonamide 6
## 6 Tetracycline 6
## 7 Other 5
## 8 Polymyxin 4
## 9 Quinolone 3
## 10 Glycopeptide 2
## 11 Phenicol 1
classes_to_plot <- ordered_classes %>%
filter(n > 5) %>%
pull(Antimicrobial_Class)
The number of columns we want:
ncols <- 2
plt_val <- c(.13, .92, .2, .85)
Plotting the genes from the classes with the highest number of genes:
opar <- par(mfrow = c(ceiling(length(classes_to_plot) / ncols), ncols), plt = plt_val)
walk(classes_to_plot, pca_biplot, xs)
par(opar)
There is a group of genes that behave very similarly:
(tmp <- filter(xs, PC1 < -3.8, PC2 > 5))
## PC1 PC2 class color
## fosB -4.137040 5.142624 Other #FB8072
## fox5 -4.133710 5.136227 Beta-Lactam #80B1D3
## mcr-2 -4.140431 5.142062 Polymyxin #BEBADA
## mcr-3 -4.126140 5.148737 Polymyxin #BEBADA
## oprD -4.135513 5.128459 Multidrug #FFFFB3
## oqxA -4.068914 5.065868 Multidrug #FFFFB3
## qacA -4.141751 5.142154 Multidrug #FFFFB3
## qacC -4.127516 5.045876 Multidrug #FFFFB3
## qnrA -4.000327 5.139648 Quinolone #D9D9D9
## vanA -4.140156 5.144044 Glycopeptide #CCEBC5
## vanB -4.121341 5.119012 Glycopeptide #CCEBC5
## vatA -4.139672 5.142572 MLSB #FCCDE5
But that seems to be not interesting:
ncols <- 4
opar <- par(mfrow = c(ceiling(nrow(tmp) / ncols), ncols), plt = plt_val)
walk(rownames(tmp), plot_differences_quantiles)
par(opar)
Here we basically reuse the code of the previous subsection. The
function calc_relvar()
basically replaces the function
calc_differences()
of the previous subsection:
calc_relvar <- function(gene) {
genes %>%
map(extract, c("SamplingDay2", "Group", gene)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate_at, c("Control", "Treatment"), scale) %>%
map(pivot_longer, -1, names_to = "group", values_to = gene) %>%
bind_rows(.id = "farm")
}
The function pc1pc2()
packages part of the code of the
previous subsection:
pc1pc2 <- function(data) {
# the PCA:
pca <- data %>%
mutate_all(~ replace(.x, is.nan(.x), 0)) %>% # because scaling may generate NaNs
filter(if_all(.fns = Negate(is.na))) %>%
select(-farm, -SamplingDay2, -group) %>% ###
t() %>%
as.data.frame() %>%
select(where(~ any(. != 0))) %>%
prcomp(scale. = TRUE)
# PC1 and PC2 coordinates of the PCA:
xs <- pca$x
xs %<>%
as.data.frame() %>%
select(PC1, PC2) %>%
mutate(class = hash1[rownames(xs)],
color = hash2[class])
# adding the percentage of variance as metadata:
variance <- pca$sdev^2
variance <- variance / sum(variance)
attr(xs, "percvar") <- round(100 * variance[1:2])
# returns value:
xs
}
Computing the coordinates on the first 2 components of the PCA:
xs2 <- resistance_genes %>%
map(calc_relvar) %>%
reduce(left_join, by = c("farm", "SamplingDay2", "group")) %>%
pc1pc2()
Plotting the genes from the classes with the highest number of genes:
ncols <- 2
opar <- par(mfrow = c(ceiling(length(classes_to_plot) / ncols), ncols), plt = plt_val)
walk(classes_to_plot, pca_biplot, xs2)
par(opar)
There is a group of genes that behave very similarly:
(tmp <- filter(xs2, PC1 < -11))
## PC1 PC2 class color
## acrA -13.92566 0.50931391 Multidrug #FFFFB3
## acrB -13.37104 0.14218931 Multidrug #FFFFB3
## acrF -13.59134 0.30150631 Multidrug #FFFFB3
## arnA -13.44902 1.13768864 Polymyxin #BEBADA
## blaAMPC -13.37190 -0.46053204 Beta-Lactam #80B1D3
## blaTEM -13.16071 0.99927524 Beta-Lactam #80B1D3
## macB -13.56327 0.03226216 MLSB #FCCDE5
## mdtF -13.84306 0.82656416 Multidrug #FFFFB3
## mdtL -13.41474 0.78639676 Multidrug #FFFFB3
## mdtO -13.46817 0.69910840 Multidrug #FFFFB3
## tolC -13.60715 0.45913852 Multidrug #FFFFB3
Not really clear how similar they are:
ncols <- 4
opar <- par(mfrow = c(ceiling(nrow(tmp) / ncols), ncols), plt = plt_val)
walk(rownames(tmp), plot_gene_all_farms, plot_frame = plot_frame2log)
par(opar)
percvar <- attr(xs, "percvar")
with(xs, plot(PC1, PC2,
xlab = paste0("PC1 (", percvar[1], "%)"),
ylab = paste0("PC2 (", percvar[2], "%)")))
abline(v = 0, lty = 2, col = "grey")
abline(h = 0, lty = 2, col = "grey")
xs %>%
tibble::rownames_to_column() %>%
filter(rowname %in% rownames(filter(xs, PC1 < -3.8, PC2 > 5))) %$%
points(PC1, PC2, col = "red", pch = 19)
xs %>%
tibble::rownames_to_column() %>%
filter(rowname %in% rownames(filter(xs2, PC1 < -11))) %$%
points(PC1, PC2, col = "blue", pch = 19)
And the opposite:
percvar <- attr(xs2, "percvar")
with(xs2, plot(PC1, PC2,
xlab = paste0("PC1 (", percvar[1], "%)"),
ylab = paste0("PC2 (", percvar[2], "%)")))
abline(v = 0, lty = 2, col = "grey")
abline(h = 0, lty = 2, col = "grey")
xs2 %>%
tibble::rownames_to_column() %>%
filter(rowname %in% rownames(filter(xs, PC1 < -3.8, PC2 > 5))) %$%
points(PC1, PC2, col = "red", pch = 19)
xs2 %>%
tibble::rownames_to_column() %>%
filter(rowname %in% rownames(filter(xs2, PC1 < -11))) %$%
points(PC1, PC2, col = "blue", pch = 19)
tmp <- differences %>%
filter(if_all(.fns = Negate(is.na))) %>%
select(-farm, -SamplingDay2) %>%
t() %>%
as.data.frame() %>%
select(where(~ any(. != 0)))
km <- kmeans(tmp, centers = 11, iter.max = 1000, nstart = 50)
fviz_cluster(km, data = tmp)
library(factoextra)
library(NbClust)
fviz_nbclust(tmp, kmeans, method = "wss")
fviz_nbclust(tmp, kmeans, method = "silhouette")
fviz_nbclust(tmp, kmeans, method = "gap_stat")
NbClust(data = NULL, diss = NULL, distance = "euclidean",
min.nc = 2, max.nc = 20, method = "kmeans")
NbClust(data = tmp, min.nc = 2, max.nc = 20, method = "kmeans")