---
title: "26-visualizations"
output: html_document
date: "2024-11-18"
---
Rmd to make visualizations for paper. 

https://yulab-smu.top/biomedical-knowledge-mining-book/enrichplot.html 

```{r}
library(ggplot2)
library(tidyverse)
library(dplyr)
library(RColorBrewer)
```


# Make a figure of the enriched GO Processes

Read in the list of 9 top enriched processes:
```{r}
top9 <- read.delim("../analyses/25-compare-2021-2022/2021-2022-entichedtop9_DAVID_separated.txt")
head(top9)
```
Goal: plots of the terms, x-axis is counts and color is benjamini value. 


```{r}

# Basic barplot
p <- ggplot(data=top9, aes(x=Fold.Enrichment, y=Term, fill=Count)) +
  geom_bar(stat="identity")+theme_minimal()
p

# Rename y-axis:


# Horizontal bar plot
p + coord_flip()
```

# Make a Venn Diagram of the DEGs for 2021, 2022, and the overlap... is this just numbers??

Total DEGS for 2021 --> 6938
Total amount unique to 2021 --> (6938-4114 =) 2824
Total DEGs for 2022 --> 6237
Total amount unique to 2022 --> (6237-4114 =) 2123
```{r}
#install.packages("ggVennDiagram")
library(ggVennDiagram)
```
```{r}
#install.packages("VennDiagram")
library(VennDiagram)
```


```{r}
# create pairwise Venn diagram 
draw.pairwise.venn(area1=6938, area2=6237,cross.area=4114, 
                   category=c("A","B"),fill=c("Red","Yellow"))
# 2D Venn diagram
ggVennDiagram(x) 
```

<<<<<<< HEAD
# Disease Progression and Mortality figure: 
Focusing on just the observations for the n=16 stars in 2021 (Exp A) and n=12 stars in 2022 (Exp B) 

https://cran.r-project.org/web/packages/vistime/vignettes/gg_vistime-vignette.html \    
https://shosaco.github.io/vistime/ 

 Goal -- timeline color-coded horizontal stacked bar... 

```{r}
#install.packages("vistime")
#install.packages("cowplot")
```


```{r}
### Load packages
library("vistime")
library("tidyverse")
library("RColorBrewer")
library("scales")
library("cowplot")
```

load in data
```{r}
data <- read.csv("../analyses/26-visualizations/expA-phenotype-data-for-figure.csv")
head(data)
```
https://github.com/wlhamilton/Patient-ward-movement-timelines/blob/main/R%20script%20to%20generate%20example%20ward%20movement%20plot.R 

```{r}
### Preparing for plot
## Colours
# Define number of colours needed
cols_n <- as.numeric(n_distinct(data$Disease_Sign))
# Check we have enough colours
ifelse(cols_n>12, 
       "More than 12 wards - not enough colours!",
       "12 wards or fewer - using Set3 colours")
# Select the colours from Set3 palette in RColorBrewer
cols_to_use <- brewer.pal(n = cols_n, name = "Set3")
```


```{r}
# Create mapping of colours to wards
col_sign_mapping <- data.frame(Disease_Sign=unique(c(as.character(data$Disease_Sign))), color=cols_to_use)
# merge in the mapping to the df
data_2 <- merge(data,
                      col_sign_mapping,
                      by="Disease_Sign",
                      all.x=T,all.y=T) %>%
  select(Sample_ID, Sample_Date, Disease_Sign, start, end, color, Treatment)
data_2
```

```{r}
## Extract sample dates
sample_dates <- data_2 %>%
  select(Sample_ID, Sample_Date) %>%
  distinct(Sample_ID, .keep_all=TRUE) %>%
  arrange(Sample_Date)
```

```{r}
### Plotting
# Produce the basic plot
plot_data <- gg_vistime(data = data_2,
                        col.group = "Sample_ID", # Each row will be a sample_ID - proxy for unique star
                        col.event = "Disease_Sign", # Rows will be coloured by the disease-sign
                        show_labels = FALSE, # Remove labels indicating the disease sign
                        axis.ticks.x = FALSE,
                        axis.ticks.y = FALSE)
plot_data
```

https://stackoverflow.com/questions/40598672/adding-two-y-axis-titles-on-the-same-axis
```{r}
# Tweak the plot
plot_data <- plot_data + theme_bw() +
  ggplot2::theme(
    plot.title = element_text(size=24),
    axis.text.x = element_text(size = 12, color = "black", angle = 30, vjust = 1, hjust = 1),
    axis.text.y = element_text(size = 12, color = "black")) +
  scale_x_datetime(breaks = breaks_width("1 days"), labels = date_format("%b %d"))
plot_data
```

```{r}
# Adding date of sample - based on dates that can be found in the object "sample_dates" we made above
plot_data <- plot_data +
  annotate("point", x = as.POSIXct(sample_dates[13,2]), y = 31, size = 1, colour = "black") + #78
  annotate("point", x = as.POSIXct(sample_dates[11,2]), y = 29, size = 1, colour = "black") + #75
  annotate("point", x = as.POSIXct(sample_dates[6,2]), y = 27, size = 1, colour = "black") + #71
  annotate("point", x = as.POSIXct(sample_dates[7,2]), y = 25, size = 1, colour = "black") + #67
  annotate("point", x = as.POSIXct(sample_dates[1,2]), y = 23, size = 1, colour = "black") + #59
  annotate("point", x = as.POSIXct(sample_dates[8,2]), y = 21, size = 1, colour = "black") + #69
  annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 19, size = 1, colour = "black") + #57
  annotate("point", x = as.POSIXct(sample_dates[15,2]), y = 17, size = 1, colour = "black") + #83
  annotate("point", x = as.POSIXct(sample_dates[3,2]), y = 15, size = 1, colour = "black") + #56
  annotate("point", x = as.POSIXct(sample_dates[16,2]), y = 13, size = 1, colour = "black") + #81
  annotate("point", x = as.POSIXct(sample_dates[14,2]), y = 11, size = 1, colour = "black") + #76
  annotate("point", x = as.POSIXct(sample_dates[4,2]), y = 9, size = 1, colour = "black") + #52
  annotate("point", x = as.POSIXct(sample_dates[9,2]), y = 7, size = 1, colour = "black") + #61
  annotate("point", x = as.POSIXct(sample_dates[5,2]), y = 5, size =1, colour = "black") + #54
  annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 3, size =1, colour = "black") + #73
  annotate("point", x = as.POSIXct(sample_dates[10,2]), y = 1, size =1, colour = "black")  #64
plot_data
```

```{r}
### Create a legend
data_legend <- data_2 %>%
  distinct(Disease_Sign, .keep_all=T) %>%
  arrange(Disease_Sign)
data_legend$start <- as.Date("2021-10-05")
data_legend$end <- as.Date("2021-10-18")
data_legend$Sample_ID <- "Key"
data_legend
plot_legend <- gg_vistime(data = data_legend,
                          col.group = "Sample_ID",
                          col.event = "Disease_Sign",
                          show_labels = TRUE,
                          linewidth = 10)
plot_legend
```


```{r}
# Tweak the legend plot
plot_legend <- plot_legend + theme_void() +
  ggplot2::theme(
    plot.title = element_text(size=11),
    axis.title.x=element_blank(),
    axis.text.x=element_blank(),
    axis.ticks.x=element_blank(),
    axis.title.y=element_blank(),
    axis.text.y=element_blank(),
    axis.ticks.y=element_blank())
plot_legend
```

```{r}
### Combine the main plot and legend into a single figure
plot_combined <- plot_grid(plot_data, plot_legend,
                           rel_widths = c(1, 0.15))
plot_combined
```

```{r}
### Save plot
ggplot2::ggsave(plot_combined, file = "../analyses/26-visualizations/ExpA_disease-progression-phenotype-timeline.pdf", dpi=300, height=4, width=7, units="in")
```

# Disease Progression Timeline for Experiment B 

```{r}
### Load packages
library("vistime")
library("tidyverse")
library("RColorBrewer")
library("scales")
library("cowplot")
```

load in the data
```{r}
datab <- read.csv("../analyses/26-visualizations/expB-phenotype-data-for-figure.csv")
head(datab)
```

```{r}
### Preparing for plot
## Colours
# Define number of colours needed
cols_n <- as.numeric(n_distinct(datab$Disease_Sign))
# Check we have enough colours
ifelse(cols_n>12, 
       "More than 12 wards - not enough colours!",
       "12 wards or fewer - using Set3 colours")
# Select the colours from Set3 palette in RColorBrewer
cols_to_use <- brewer.pal(n = cols_n, name = "Set3")
```
```{r}
# Create mapping of colours to wards
col_sign_mapping <- data.frame(Disease_Sign=unique(c(as.character(datab$Disease_Sign))), color=cols_to_use)
# merge in the mapping to the df
datab_2 <- merge(datab,
                      col_sign_mapping,
                      by="Disease_Sign",
                      all.x=T,all.y=T) %>%
  select(Sample_ID, Sample_Date, Disease_Sign, start, end, color, Treatment)
datab_2
```
```{r}
## Extract sample dates
sample_dates <- datab_2 %>%
  select(Sample_ID, Sample_Date) %>%
  distinct(Sample_ID, .keep_all=TRUE) %>%
  arrange(Sample_Date)
```


```{r}
### Plotting
# Produce the basic plot
plot_data <- gg_vistime(data = datab_2,
                        col.group = "Sample_ID", # Each row will be a sample_ID - proxy for unique star
                        col.event = "Disease_Sign", # Rows will be coloured by the disease-sign
                        show_labels = FALSE, # Remove labels indicating the disease sign
                        axis.ticks.x = FALSE,
                        axis.ticks.y = FALSE)
plot_data
```


```{r}
# Tweak the plot
plot_data <- plot_data + theme_bw() +
  ggplot2::theme(
    plot.title = element_text(size=24),
    axis.text.x = element_text(size = 12, color = "black", angle = 30, vjust = 1, hjust = 1),
    axis.text.y = element_text(size = 12, color = "black")) +
  scale_x_datetime(breaks = breaks_width("1 days"), labels = date_format("%b %d"))
plot_data
```


```{r}
# Adding date of sample - based on dates that can be found in the object "sample_dates" we made above
plot_data <- plot_data +
  annotate("point", x = as.POSIXct(sample_dates[1,2]), y = 23, size = 1, colour = "black") + #174
  annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 21, size = 1, colour = "black") + #190
  annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 19, size = 1, colour = "black") + #231
  annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 17, size = 1, colour = "black") + #187
  annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 15, size = 1, colour = "black") + #188
  annotate("point", x = as.POSIXct(sample_dates[9,2]), y = 13, size = 1, colour = "black") + #228
  annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 11, size = 1, colour = "black") + #186
  annotate("point", x = as.POSIXct(sample_dates[5,2]), y = 9, size = 1, colour = "black") + #203
  annotate("point", x = as.POSIXct(sample_dates[8,2]), y = 7, size = 1, colour = "black") + #209
  annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 5, size =1, colour = "black") + #177
  annotate("point", x = as.POSIXct(sample_dates[10,2]), y = 3, size =1, colour = "black") + #219
  annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 1, size =1, colour = "black")  #230
plot_data
```

```{r}
### Create a legend
data_legend <- datab_2 %>%
  distinct(Disease_Sign, .keep_all=T) %>%
  arrange(Disease_Sign)
data_legend$start <- as.Date("2021-10-05")
data_legend$end <- as.Date("2021-10-18")
data_legend$Sample_ID <- "Key"
data_legend
plot_legend <- gg_vistime(data = data_legend,
                          col.group = "Sample_ID",
                          col.event = "Disease_Sign",
                          show_labels = TRUE,
                          linewidth = 10)
plot_legend
```


```{r}
# Tweak the legend plot
plot_legend <- plot_legend + theme_void() +
  ggplot2::theme(
    plot.title = element_text(size=11),
    axis.title.x=element_blank(),
    axis.text.x=element_blank(),
    axis.ticks.x=element_blank(),
    axis.title.y=element_blank(),
    axis.text.y=element_blank(),
    axis.ticks.y=element_blank())
plot_legend
```

```{r}
### Combine the main plot and legend into a single figure
plot_combined <- plot_grid(plot_data, plot_legend,
                           rel_widths = c(1, 0.15))
plot_combined
```

```{r}
### Save plot
ggplot2::ggsave(plot_combined, file = "../analyses/26-visualizations/ExpB_disease-progression-phenotype-timeline.pdf", dpi=300, height=4, width=7, units="in")
```


---

# Make a PCA of RNAseq data for both experiments combined, pre-DEG analyses
Color code by Experiment (A 2021; B 2022) and exposed and control, and also add in juvenile/adult?? make an option with/without the age class for Exp B 

Read in count matrices 
```{r}
expAcounts <- read.csv("../data/gene_count_matrix_2021.csv")
head(expAcounts)
```

```{r}
expBcounts <- read.csv("../data/gene_count_matrix_2022.csv")
head(expBcounts)
```

combine the two tables: 
```{r}
expABcounts <- left_join(expAcounts, expBcounts, by="gene_id")
head(expABcounts)
```
only keep libraries that we did comparisons for: 
```{r}
expABcounts_sm <- select(expABcounts, "gene_id", "PSC.56", "PSC.52", "PSC.54", "PSC.61", "PSC.64", "PSC.73", "PSC.76", "PSC.81", "PSC.59", "PSC.57", "PSC.69", "PSC.67", "PSC.71", "PSC.75", "PSC.78", "PSC.83", "PSC.0228", "PSC.0187", "PSC.0188", "PSC.0174", "PSC.0190", "PSC.0231", "PSC.0230", "PSC.0219", "PSC.0177", "PSC.0186", "PSC.0209", "PSC.0203")
```


Make a PCA:
```{r}
#install.packages("ggfortify")
```


```{r}
library(ggfortify)
library(plotly)
```


```{r}
expABcounts_transpose = t(expABcounts_sm)
```

make first row into column names: 
```{r}
colnames(expABcounts_transpose)=expABcounts_transpose[c(1),]
```

remove first row: 
```{r}
expABcounts_transpose=expABcounts_transpose[-c(1), ]
```

make it a dataframe
```{r}
expABcounts_transpose <- as.data.frame(expABcounts_transpose)
```


make rownames a column called SampleID
```{r}
library(tibble)
expABcounts_transpose <- tibble::rownames_to_column(expABcounts_transpose, "Sample_ID")
```

make the sample_ID into rownames:
```{r}
#expABcounts_transpose <- data.frame(expABcounts_transpose, row.names = 1)
```


```{r}
library(dplyr)
expABcounts_transpose <- expABcounts_transpose %>% mutate_at(2:23465, as.numeric)
```

change Sample_ID s into descriptions 


add a column that tells the treatment group of each sample: 
```{r}
expABcounts_transpose$Treatment <- c("Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Juvenile", "Exposed_Juvenile", "Exposed_Juvenile", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Control_Juvenile", "Control_Juvenile", "Control_Juvenile", "Control_Adult", "Control_Adult", "Control_Adult")

```

```{r}
expABcounts_trtmnt <- as.data.frame(expABcounts_trtmnt)

```


```{r}
df_nums <- expABcounts_transpose[2:23465]
pca <- prcomp(df_nums)


p <- autoplot(pca, data = expABcounts_transpose, colour = 'Treatment', frame = TRUE)

ggplotly(p)
```

# create a plot showing the average days to death from experiment B comparing adults and juveniles

| star_id | age      | days_to_death |
|---------|----------|---------------|
| 3       | adult    | 12            |
| 5       | adult    | 13            |
| 6       | adult    | 14            |
| 10      | adult    | 13            |
| 13      | adult    | 17            |
| 18      | adult    | 14            |
| 22      | adult    | 14            |
| 28      | adult    | 16            |
| 30      | adult    | 14            |
| 37      | juvenile | 11            |
| 38      | juvenile | 15            |
| 39      | juvenile | 12            |
| 44      | juvenile | 11            |
| 46      | juvenile | 12            |
| 47      | juvenile | 11            |
| 50      | juvenile | 12            |
| 54      | juvenile | 11            |
| 57      | juvenile | 13            |
| 61      | juvenile | 15            |

I want the plot to have the y-axis be days to death   
X -axis is two groups: juveniles and adults    
the plot will show two points -- one for juveniles at the average day to death with error bars, and one for adults with average day to death with error bars 

https://statdoe.com/step-by-step-scatterplot-for-one-factor-in-r/


```{r}
# loading the appropriate libraries
library(readr)
library(ggplot2)

# loading and checking the data
mortexpb <- read_csv("../analyses/26-visualizations/mortality_expB_data.csv")
print(mortexpb)
```

```{r}
# scatterplot
options(repr.plot.width=2, repr.plot.height=4) 

ggplot(mortexpb, aes(age, average_days_to_death)) + 
  geom_point(size = 5) +
  geom_errorbar(aes(ymin=average_days_to_death-sd, ymax=average_days_to_death+sd), width = 1) +
  labs(x="Age", y="Days To Mortality Post-Exposure") +
  ylim(0, 20) +
  theme_bw(base_size = 18) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
```

```{r}
# saving the final figure
ggsave("../analyses/26-visualizations/ExpB_mort_scatterplot.png", width = 4, height = 2.5, dpi = 1000)
```


## Make a PCA for both experiments' raw RNAseq data, grouping by treatments for all experiments 
Read in count matrices 
```{r}
expAcounts <- read.csv("../data/gene_count_matrix_2021.csv")
head(expAcounts)
```

```{r}
expBcounts <- read.csv("../data/gene_count_matrix_2022.csv")
head(expBcounts)
```

combine the two tables: 
```{r}
expABcounts <- left_join(expAcounts, expBcounts, by="gene_id")
head(expABcounts)
```
only keep libraries that we did comparisons for: 
```{r}
expABcounts_sm <- select(expABcounts, "gene_id", "PSC.56", "PSC.52", "PSC.54", "PSC.61", "PSC.64", "PSC.73", "PSC.76", "PSC.81", "PSC.59", "PSC.57", "PSC.69", "PSC.67", "PSC.71", "PSC.75", "PSC.78", "PSC.83", "PSC.0228", "PSC.0187", "PSC.0188", "PSC.0174", "PSC.0190", "PSC.0231", "PSC.0230", "PSC.0219", "PSC.0177", "PSC.0186", "PSC.0209", "PSC.0203")
```

Make a PCA:
```{r}
#install.packages("ggfortify")
```


```{r}
library(ggfortify)
library(plotly)
```


```{r}
expABcounts_transpose = t(expABcounts_sm)
```

make first row into column names: 
```{r}
colnames(expABcounts_transpose)=expABcounts_transpose[c(1),]
```

remove first row: 
```{r}
expABcounts_transpose=expABcounts_transpose[-c(1), ]
```

make it a dataframe
```{r}
expABcounts_transpose <- as.data.frame(expABcounts_transpose)
```


make rownames a column called SampleID
```{r}
library(tibble)
expABcounts_transpose <- tibble::rownames_to_column(expABcounts_transpose, "Sample_ID")
```

make the sample_ID into rownames:
```{r}
#expABcounts_transpose <- data.frame(expABcounts_transpose, row.names = 1)
```


```{r}
library(dplyr)
expABcounts_transpose <- expABcounts_transpose %>% mutate_at(2:23465, as.numeric)
```

change Sample_ID s into descriptions 


add a column that tells the treatment group of each sample: 
```{r}
expABcounts_transpose$Treatment <- c("Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge")

```

```{r}
expABcounts_transpose <- as.data.frame(expABcounts_tra)

```


```{r}
df_nums <- expABcounts_transpose[2:23465]
pca <- prcomp(df_nums)


p <- autoplot(pca, data = expABcounts_transpose, colour = 'Treatment', frame = TRUE)

ggplotly(p)
```