---
title: "26-visualizations"
output: html_document
date: "2024-11-18"
---
Rmd to make visualizations for paper.
https://yulab-smu.top/biomedical-knowledge-mining-book/enrichplot.html
```{r}
library(ggplot2)
library(tidyverse)
library(dplyr)
library(RColorBrewer)
```
# Make a figure of the enriched GO Processes
Read in the list of 9 top enriched processes:
```{r}
top9 <- read.delim("../analyses/25-compare-2021-2022/2021-2022-entichedtop9_DAVID_separated.txt")
head(top9)
```
Goal: plots of the terms, x-axis is counts and color is benjamini value.
```{r}
# Basic barplot
p <- ggplot(data=top9, aes(x=Fold.Enrichment, y=Term, fill=Count)) +
geom_bar(stat="identity")+theme_minimal()
p
# Rename y-axis:
# Horizontal bar plot
p + coord_flip()
```
# Make a Venn Diagram of the DEGs for 2021, 2022, and the overlap... is this just numbers??
Total DEGS for 2021 --> 6938
Total amount unique to 2021 --> (6938-4114 =) 2824
Total DEGs for 2022 --> 6237
Total amount unique to 2022 --> (6237-4114 =) 2123
```{r}
#install.packages("ggVennDiagram")
library(ggVennDiagram)
```
```{r}
#install.packages("VennDiagram")
library(VennDiagram)
```
```{r}
# create pairwise Venn diagram
draw.pairwise.venn(area1=6938, area2=6237,cross.area=4114,
category=c("A","B"),fill=c("Red","Yellow"))
# 2D Venn diagram
ggVennDiagram(x)
```
<<<<<<< HEAD
# Disease Progression and Mortality figure:
Focusing on just the observations for the n=16 stars in 2021 (Exp A) and n=12 stars in 2022 (Exp B)
https://cran.r-project.org/web/packages/vistime/vignettes/gg_vistime-vignette.html \
https://shosaco.github.io/vistime/
Goal -- timeline color-coded horizontal stacked bar...
```{r}
#install.packages("vistime")
#install.packages("cowplot")
```
```{r}
### Load packages
library("vistime")
library("tidyverse")
library("RColorBrewer")
library("scales")
library("cowplot")
```
load in data
```{r}
data <- read.csv("../analyses/26-visualizations/expA-phenotype-data-for-figure.csv")
head(data)
```
https://github.com/wlhamilton/Patient-ward-movement-timelines/blob/main/R%20script%20to%20generate%20example%20ward%20movement%20plot.R
```{r}
### Preparing for plot
## Colours
# Define number of colours needed
cols_n <- as.numeric(n_distinct(data$Disease_Sign))
# Check we have enough colours
ifelse(cols_n>12,
"More than 12 wards - not enough colours!",
"12 wards or fewer - using Set3 colours")
# Select the colours from Set3 palette in RColorBrewer
cols_to_use <- brewer.pal(n = cols_n, name = "Set3")
```
```{r}
# Create mapping of colours to wards
col_sign_mapping <- data.frame(Disease_Sign=unique(c(as.character(data$Disease_Sign))), color=cols_to_use)
# merge in the mapping to the df
data_2 <- merge(data,
col_sign_mapping,
by="Disease_Sign",
all.x=T,all.y=T) %>%
select(Sample_ID, Sample_Date, Disease_Sign, start, end, color, Treatment)
data_2
```
```{r}
## Extract sample dates
sample_dates <- data_2 %>%
select(Sample_ID, Sample_Date) %>%
distinct(Sample_ID, .keep_all=TRUE) %>%
arrange(Sample_Date)
```
```{r}
### Plotting
# Produce the basic plot
plot_data <- gg_vistime(data = data_2,
col.group = "Sample_ID", # Each row will be a sample_ID - proxy for unique star
col.event = "Disease_Sign", # Rows will be coloured by the disease-sign
show_labels = FALSE, # Remove labels indicating the disease sign
axis.ticks.x = FALSE,
axis.ticks.y = FALSE)
plot_data
```
https://stackoverflow.com/questions/40598672/adding-two-y-axis-titles-on-the-same-axis
```{r}
# Tweak the plot
plot_data <- plot_data + theme_bw() +
ggplot2::theme(
plot.title = element_text(size=24),
axis.text.x = element_text(size = 12, color = "black", angle = 30, vjust = 1, hjust = 1),
axis.text.y = element_text(size = 12, color = "black")) +
scale_x_datetime(breaks = breaks_width("1 days"), labels = date_format("%b %d"))
plot_data
```
```{r}
# Adding date of sample - based on dates that can be found in the object "sample_dates" we made above
plot_data <- plot_data +
annotate("point", x = as.POSIXct(sample_dates[13,2]), y = 31, size = 1, colour = "black") + #78
annotate("point", x = as.POSIXct(sample_dates[11,2]), y = 29, size = 1, colour = "black") + #75
annotate("point", x = as.POSIXct(sample_dates[6,2]), y = 27, size = 1, colour = "black") + #71
annotate("point", x = as.POSIXct(sample_dates[7,2]), y = 25, size = 1, colour = "black") + #67
annotate("point", x = as.POSIXct(sample_dates[1,2]), y = 23, size = 1, colour = "black") + #59
annotate("point", x = as.POSIXct(sample_dates[8,2]), y = 21, size = 1, colour = "black") + #69
annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 19, size = 1, colour = "black") + #57
annotate("point", x = as.POSIXct(sample_dates[15,2]), y = 17, size = 1, colour = "black") + #83
annotate("point", x = as.POSIXct(sample_dates[3,2]), y = 15, size = 1, colour = "black") + #56
annotate("point", x = as.POSIXct(sample_dates[16,2]), y = 13, size = 1, colour = "black") + #81
annotate("point", x = as.POSIXct(sample_dates[14,2]), y = 11, size = 1, colour = "black") + #76
annotate("point", x = as.POSIXct(sample_dates[4,2]), y = 9, size = 1, colour = "black") + #52
annotate("point", x = as.POSIXct(sample_dates[9,2]), y = 7, size = 1, colour = "black") + #61
annotate("point", x = as.POSIXct(sample_dates[5,2]), y = 5, size =1, colour = "black") + #54
annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 3, size =1, colour = "black") + #73
annotate("point", x = as.POSIXct(sample_dates[10,2]), y = 1, size =1, colour = "black") #64
plot_data
```
```{r}
### Create a legend
data_legend <- data_2 %>%
distinct(Disease_Sign, .keep_all=T) %>%
arrange(Disease_Sign)
data_legend$start <- as.Date("2021-10-05")
data_legend$end <- as.Date("2021-10-18")
data_legend$Sample_ID <- "Key"
data_legend
plot_legend <- gg_vistime(data = data_legend,
col.group = "Sample_ID",
col.event = "Disease_Sign",
show_labels = TRUE,
linewidth = 10)
plot_legend
```
```{r}
# Tweak the legend plot
plot_legend <- plot_legend + theme_void() +
ggplot2::theme(
plot.title = element_text(size=11),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
plot_legend
```
```{r}
### Combine the main plot and legend into a single figure
plot_combined <- plot_grid(plot_data, plot_legend,
rel_widths = c(1, 0.15))
plot_combined
```
```{r}
### Save plot
ggplot2::ggsave(plot_combined, file = "../analyses/26-visualizations/ExpA_disease-progression-phenotype-timeline.pdf", dpi=300, height=4, width=7, units="in")
```
# Disease Progression Timeline for Experiment B
```{r}
### Load packages
library("vistime")
library("tidyverse")
library("RColorBrewer")
library("scales")
library("cowplot")
```
load in the data
```{r}
datab <- read.csv("../analyses/26-visualizations/expB-phenotype-data-for-figure.csv")
head(datab)
```
```{r}
### Preparing for plot
## Colours
# Define number of colours needed
cols_n <- as.numeric(n_distinct(datab$Disease_Sign))
# Check we have enough colours
ifelse(cols_n>12,
"More than 12 wards - not enough colours!",
"12 wards or fewer - using Set3 colours")
# Select the colours from Set3 palette in RColorBrewer
cols_to_use <- brewer.pal(n = cols_n, name = "Set3")
```
```{r}
# Create mapping of colours to wards
col_sign_mapping <- data.frame(Disease_Sign=unique(c(as.character(datab$Disease_Sign))), color=cols_to_use)
# merge in the mapping to the df
datab_2 <- merge(datab,
col_sign_mapping,
by="Disease_Sign",
all.x=T,all.y=T) %>%
select(Sample_ID, Sample_Date, Disease_Sign, start, end, color, Treatment)
datab_2
```
```{r}
## Extract sample dates
sample_dates <- datab_2 %>%
select(Sample_ID, Sample_Date) %>%
distinct(Sample_ID, .keep_all=TRUE) %>%
arrange(Sample_Date)
```
```{r}
### Plotting
# Produce the basic plot
plot_data <- gg_vistime(data = datab_2,
col.group = "Sample_ID", # Each row will be a sample_ID - proxy for unique star
col.event = "Disease_Sign", # Rows will be coloured by the disease-sign
show_labels = FALSE, # Remove labels indicating the disease sign
axis.ticks.x = FALSE,
axis.ticks.y = FALSE)
plot_data
```
```{r}
# Tweak the plot
plot_data <- plot_data + theme_bw() +
ggplot2::theme(
plot.title = element_text(size=24),
axis.text.x = element_text(size = 12, color = "black", angle = 30, vjust = 1, hjust = 1),
axis.text.y = element_text(size = 12, color = "black")) +
scale_x_datetime(breaks = breaks_width("1 days"), labels = date_format("%b %d"))
plot_data
```
```{r}
# Adding date of sample - based on dates that can be found in the object "sample_dates" we made above
plot_data <- plot_data +
annotate("point", x = as.POSIXct(sample_dates[1,2]), y = 23, size = 1, colour = "black") + #174
annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 21, size = 1, colour = "black") + #190
annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 19, size = 1, colour = "black") + #231
annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 17, size = 1, colour = "black") + #187
annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 15, size = 1, colour = "black") + #188
annotate("point", x = as.POSIXct(sample_dates[9,2]), y = 13, size = 1, colour = "black") + #228
annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 11, size = 1, colour = "black") + #186
annotate("point", x = as.POSIXct(sample_dates[5,2]), y = 9, size = 1, colour = "black") + #203
annotate("point", x = as.POSIXct(sample_dates[8,2]), y = 7, size = 1, colour = "black") + #209
annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 5, size =1, colour = "black") + #177
annotate("point", x = as.POSIXct(sample_dates[10,2]), y = 3, size =1, colour = "black") + #219
annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 1, size =1, colour = "black") #230
plot_data
```
```{r}
### Create a legend
data_legend <- datab_2 %>%
distinct(Disease_Sign, .keep_all=T) %>%
arrange(Disease_Sign)
data_legend$start <- as.Date("2021-10-05")
data_legend$end <- as.Date("2021-10-18")
data_legend$Sample_ID <- "Key"
data_legend
plot_legend <- gg_vistime(data = data_legend,
col.group = "Sample_ID",
col.event = "Disease_Sign",
show_labels = TRUE,
linewidth = 10)
plot_legend
```
```{r}
# Tweak the legend plot
plot_legend <- plot_legend + theme_void() +
ggplot2::theme(
plot.title = element_text(size=11),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
plot_legend
```
```{r}
### Combine the main plot and legend into a single figure
plot_combined <- plot_grid(plot_data, plot_legend,
rel_widths = c(1, 0.15))
plot_combined
```
```{r}
### Save plot
ggplot2::ggsave(plot_combined, file = "../analyses/26-visualizations/ExpB_disease-progression-phenotype-timeline.pdf", dpi=300, height=4, width=7, units="in")
```
---
# Make a PCA of RNAseq data for both experiments combined, pre-DEG analyses
Color code by Experiment (A 2021; B 2022) and exposed and control, and also add in juvenile/adult?? make an option with/without the age class for Exp B
Read in count matrices
```{r}
expAcounts <- read.csv("../data/gene_count_matrix_2021.csv")
head(expAcounts)
```
```{r}
expBcounts <- read.csv("../data/gene_count_matrix_2022.csv")
head(expBcounts)
```
combine the two tables:
```{r}
expABcounts <- left_join(expAcounts, expBcounts, by="gene_id")
head(expABcounts)
```
only keep libraries that we did comparisons for:
```{r}
expABcounts_sm <- select(expABcounts, "gene_id", "PSC.56", "PSC.52", "PSC.54", "PSC.61", "PSC.64", "PSC.73", "PSC.76", "PSC.81", "PSC.59", "PSC.57", "PSC.69", "PSC.67", "PSC.71", "PSC.75", "PSC.78", "PSC.83", "PSC.0228", "PSC.0187", "PSC.0188", "PSC.0174", "PSC.0190", "PSC.0231", "PSC.0230", "PSC.0219", "PSC.0177", "PSC.0186", "PSC.0209", "PSC.0203")
```
Make a PCA:
```{r}
#install.packages("ggfortify")
```
```{r}
library(ggfortify)
library(plotly)
```
```{r}
expABcounts_transpose = t(expABcounts_sm)
```
make first row into column names:
```{r}
colnames(expABcounts_transpose)=expABcounts_transpose[c(1),]
```
remove first row:
```{r}
expABcounts_transpose=expABcounts_transpose[-c(1), ]
```
make it a dataframe
```{r}
expABcounts_transpose <- as.data.frame(expABcounts_transpose)
```
make rownames a column called SampleID
```{r}
library(tibble)
expABcounts_transpose <- tibble::rownames_to_column(expABcounts_transpose, "Sample_ID")
```
make the sample_ID into rownames:
```{r}
#expABcounts_transpose <- data.frame(expABcounts_transpose, row.names = 1)
```
```{r}
library(dplyr)
expABcounts_transpose <- expABcounts_transpose %>% mutate_at(2:23465, as.numeric)
```
change Sample_ID s into descriptions
add a column that tells the treatment group of each sample:
```{r}
expABcounts_transpose$Treatment <- c("Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Juvenile", "Exposed_Juvenile", "Exposed_Juvenile", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Control_Juvenile", "Control_Juvenile", "Control_Juvenile", "Control_Adult", "Control_Adult", "Control_Adult")
```
```{r}
expABcounts_trtmnt <- as.data.frame(expABcounts_trtmnt)
```
```{r}
df_nums <- expABcounts_transpose[2:23465]
pca <- prcomp(df_nums)
p <- autoplot(pca, data = expABcounts_transpose, colour = 'Treatment', frame = TRUE)
ggplotly(p)
```
# create a plot showing the average days to death from experiment B comparing adults and juveniles
| star_id | age | days_to_death |
|---------|----------|---------------|
| 3 | adult | 12 |
| 5 | adult | 13 |
| 6 | adult | 14 |
| 10 | adult | 13 |
| 13 | adult | 17 |
| 18 | adult | 14 |
| 22 | adult | 14 |
| 28 | adult | 16 |
| 30 | adult | 14 |
| 37 | juvenile | 11 |
| 38 | juvenile | 15 |
| 39 | juvenile | 12 |
| 44 | juvenile | 11 |
| 46 | juvenile | 12 |
| 47 | juvenile | 11 |
| 50 | juvenile | 12 |
| 54 | juvenile | 11 |
| 57 | juvenile | 13 |
| 61 | juvenile | 15 |
I want the plot to have the y-axis be days to death
X -axis is two groups: juveniles and adults
the plot will show two points -- one for juveniles at the average day to death with error bars, and one for adults with average day to death with error bars
https://statdoe.com/step-by-step-scatterplot-for-one-factor-in-r/
```{r}
# loading the appropriate libraries
library(readr)
library(ggplot2)
# loading and checking the data
mortexpb <- read_csv("../analyses/26-visualizations/mortality_expB_data.csv")
print(mortexpb)
```
```{r}
# scatterplot
options(repr.plot.width=2, repr.plot.height=4)
ggplot(mortexpb, aes(age, average_days_to_death)) +
geom_point(size = 5) +
geom_errorbar(aes(ymin=average_days_to_death-sd, ymax=average_days_to_death+sd), width = 1) +
labs(x="Age", y="Days To Mortality Post-Exposure") +
ylim(0, 20) +
theme_bw(base_size = 18) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
```
```{r}
# saving the final figure
ggsave("../analyses/26-visualizations/ExpB_mort_scatterplot.png", width = 4, height = 2.5, dpi = 1000)
```
## Make a PCA for both experiments' raw RNAseq data, grouping by treatments for all experiments
Read in count matrices
```{r}
expAcounts <- read.csv("../data/gene_count_matrix_2021.csv")
head(expAcounts)
```
```{r}
expBcounts <- read.csv("../data/gene_count_matrix_2022.csv")
head(expBcounts)
```
combine the two tables:
```{r}
expABcounts <- left_join(expAcounts, expBcounts, by="gene_id")
head(expABcounts)
```
only keep libraries that we did comparisons for:
```{r}
expABcounts_sm <- select(expABcounts, "gene_id", "PSC.56", "PSC.52", "PSC.54", "PSC.61", "PSC.64", "PSC.73", "PSC.76", "PSC.81", "PSC.59", "PSC.57", "PSC.69", "PSC.67", "PSC.71", "PSC.75", "PSC.78", "PSC.83", "PSC.0228", "PSC.0187", "PSC.0188", "PSC.0174", "PSC.0190", "PSC.0231", "PSC.0230", "PSC.0219", "PSC.0177", "PSC.0186", "PSC.0209", "PSC.0203")
```
Make a PCA:
```{r}
#install.packages("ggfortify")
```
```{r}
library(ggfortify)
library(plotly)
```
```{r}
expABcounts_transpose = t(expABcounts_sm)
```
make first row into column names:
```{r}
colnames(expABcounts_transpose)=expABcounts_transpose[c(1),]
```
remove first row:
```{r}
expABcounts_transpose=expABcounts_transpose[-c(1), ]
```
make it a dataframe
```{r}
expABcounts_transpose <- as.data.frame(expABcounts_transpose)
```
make rownames a column called SampleID
```{r}
library(tibble)
expABcounts_transpose <- tibble::rownames_to_column(expABcounts_transpose, "Sample_ID")
```
make the sample_ID into rownames:
```{r}
#expABcounts_transpose <- data.frame(expABcounts_transpose, row.names = 1)
```
```{r}
library(dplyr)
expABcounts_transpose <- expABcounts_transpose %>% mutate_at(2:23465, as.numeric)
```
change Sample_ID s into descriptions
add a column that tells the treatment group of each sample:
```{r}
expABcounts_transpose$Treatment <- c("Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge")
```
```{r}
expABcounts_transpose <- as.data.frame(expABcounts_tra)
```
```{r}
df_nums <- expABcounts_transpose[2:23465]
pca <- prcomp(df_nums)
p <- autoplot(pca, data = expABcounts_transpose, colour = 'Treatment', frame = TRUE)
ggplotly(p)
```