--- title: "26-visualizations" output: html_document date: "2024-11-18" --- Rmd to make visualizations for paper. https://yulab-smu.top/biomedical-knowledge-mining-book/enrichplot.html ```{r} library(ggplot2) library(tidyverse) library(dplyr) library(RColorBrewer) ``` # Make a figure of the enriched GO Processes Read in the list of 9 top enriched processes: ```{r} top9 <- read.delim("../analyses/25-compare-2021-2022/2021-2022-entichedtop9_DAVID_separated.txt") head(top9) ``` Goal: plots of the terms, x-axis is counts and color is benjamini value. ```{r} # Basic barplot p <- ggplot(data=top9, aes(x=Fold.Enrichment, y=Term, fill=Count)) + geom_bar(stat="identity")+theme_minimal() p # Rename y-axis: # Horizontal bar plot p + coord_flip() ``` # Make a Venn Diagram of the DEGs for 2021, 2022, and the overlap... is this just numbers?? Total DEGS for 2021 --> 6938 Total amount unique to 2021 --> (6938-4114 =) 2824 Total DEGs for 2022 --> 6237 Total amount unique to 2022 --> (6237-4114 =) 2123 ```{r} #install.packages("ggVennDiagram") library(ggVennDiagram) ``` ```{r} #install.packages("VennDiagram") library(VennDiagram) ``` ```{r} # create pairwise Venn diagram draw.pairwise.venn(area1=6938, area2=6237,cross.area=4114, category=c("A","B"),fill=c("Red","Yellow")) # 2D Venn diagram ggVennDiagram(x) ``` <<<<<<< HEAD # Disease Progression and Mortality figure: Focusing on just the observations for the n=16 stars in 2021 (Exp A) and n=12 stars in 2022 (Exp B) https://cran.r-project.org/web/packages/vistime/vignettes/gg_vistime-vignette.html \ https://shosaco.github.io/vistime/ Goal -- timeline color-coded horizontal stacked bar... ```{r} #install.packages("vistime") #install.packages("cowplot") ``` ```{r} ### Load packages library("vistime") library("tidyverse") library("RColorBrewer") library("scales") library("cowplot") ``` load in data ```{r} data <- read.csv("../analyses/26-visualizations/expA-phenotype-data-for-figure.csv") head(data) ``` https://github.com/wlhamilton/Patient-ward-movement-timelines/blob/main/R%20script%20to%20generate%20example%20ward%20movement%20plot.R ```{r} ### Preparing for plot ## Colours # Define number of colours needed cols_n <- as.numeric(n_distinct(data$Disease_Sign)) # Check we have enough colours ifelse(cols_n>12, "More than 12 wards - not enough colours!", "12 wards or fewer - using Set3 colours") # Select the colours from Set3 palette in RColorBrewer cols_to_use <- brewer.pal(n = cols_n, name = "Set3") ``` ```{r} # Create mapping of colours to wards col_sign_mapping <- data.frame(Disease_Sign=unique(c(as.character(data$Disease_Sign))), color=cols_to_use) # merge in the mapping to the df data_2 <- merge(data, col_sign_mapping, by="Disease_Sign", all.x=T,all.y=T) %>% select(Sample_ID, Sample_Date, Disease_Sign, start, end, color, Treatment) data_2 ``` ```{r} ## Extract sample dates sample_dates <- data_2 %>% select(Sample_ID, Sample_Date) %>% distinct(Sample_ID, .keep_all=TRUE) %>% arrange(Sample_Date) ``` ```{r} ### Plotting # Produce the basic plot plot_data <- gg_vistime(data = data_2, col.group = "Sample_ID", # Each row will be a sample_ID - proxy for unique star col.event = "Disease_Sign", # Rows will be coloured by the disease-sign show_labels = FALSE, # Remove labels indicating the disease sign axis.ticks.x = FALSE, axis.ticks.y = FALSE) plot_data ``` https://stackoverflow.com/questions/40598672/adding-two-y-axis-titles-on-the-same-axis ```{r} # Tweak the plot plot_data <- plot_data + theme_bw() + ggplot2::theme( plot.title = element_text(size=24), axis.text.x = element_text(size = 12, color = "black", angle = 30, vjust = 1, hjust = 1), axis.text.y = element_text(size = 12, color = "black")) + scale_x_datetime(breaks = breaks_width("1 days"), labels = date_format("%b %d")) plot_data ``` ```{r} # Adding date of sample - based on dates that can be found in the object "sample_dates" we made above plot_data <- plot_data + annotate("point", x = as.POSIXct(sample_dates[13,2]), y = 31, size = 1, colour = "black") + #78 annotate("point", x = as.POSIXct(sample_dates[11,2]), y = 29, size = 1, colour = "black") + #75 annotate("point", x = as.POSIXct(sample_dates[6,2]), y = 27, size = 1, colour = "black") + #71 annotate("point", x = as.POSIXct(sample_dates[7,2]), y = 25, size = 1, colour = "black") + #67 annotate("point", x = as.POSIXct(sample_dates[1,2]), y = 23, size = 1, colour = "black") + #59 annotate("point", x = as.POSIXct(sample_dates[8,2]), y = 21, size = 1, colour = "black") + #69 annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 19, size = 1, colour = "black") + #57 annotate("point", x = as.POSIXct(sample_dates[15,2]), y = 17, size = 1, colour = "black") + #83 annotate("point", x = as.POSIXct(sample_dates[3,2]), y = 15, size = 1, colour = "black") + #56 annotate("point", x = as.POSIXct(sample_dates[16,2]), y = 13, size = 1, colour = "black") + #81 annotate("point", x = as.POSIXct(sample_dates[14,2]), y = 11, size = 1, colour = "black") + #76 annotate("point", x = as.POSIXct(sample_dates[4,2]), y = 9, size = 1, colour = "black") + #52 annotate("point", x = as.POSIXct(sample_dates[9,2]), y = 7, size = 1, colour = "black") + #61 annotate("point", x = as.POSIXct(sample_dates[5,2]), y = 5, size =1, colour = "black") + #54 annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 3, size =1, colour = "black") + #73 annotate("point", x = as.POSIXct(sample_dates[10,2]), y = 1, size =1, colour = "black") #64 plot_data ``` ```{r} ### Create a legend data_legend <- data_2 %>% distinct(Disease_Sign, .keep_all=T) %>% arrange(Disease_Sign) data_legend$start <- as.Date("2021-10-05") data_legend$end <- as.Date("2021-10-18") data_legend$Sample_ID <- "Key" data_legend plot_legend <- gg_vistime(data = data_legend, col.group = "Sample_ID", col.event = "Disease_Sign", show_labels = TRUE, linewidth = 10) plot_legend ``` ```{r} # Tweak the legend plot plot_legend <- plot_legend + theme_void() + ggplot2::theme( plot.title = element_text(size=11), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) plot_legend ``` ```{r} ### Combine the main plot and legend into a single figure plot_combined <- plot_grid(plot_data, plot_legend, rel_widths = c(1, 0.15)) plot_combined ``` ```{r} ### Save plot ggplot2::ggsave(plot_combined, file = "../analyses/26-visualizations/ExpA_disease-progression-phenotype-timeline.pdf", dpi=300, height=4, width=7, units="in") ``` # Disease Progression Timeline for Experiment B ```{r} ### Load packages library("vistime") library("tidyverse") library("RColorBrewer") library("scales") library("cowplot") ``` load in the data ```{r} datab <- read.csv("../analyses/26-visualizations/expB-phenotype-data-for-figure.csv") head(datab) ``` ```{r} ### Preparing for plot ## Colours # Define number of colours needed cols_n <- as.numeric(n_distinct(datab$Disease_Sign)) # Check we have enough colours ifelse(cols_n>12, "More than 12 wards - not enough colours!", "12 wards or fewer - using Set3 colours") # Select the colours from Set3 palette in RColorBrewer cols_to_use <- brewer.pal(n = cols_n, name = "Set3") ``` ```{r} # Create mapping of colours to wards col_sign_mapping <- data.frame(Disease_Sign=unique(c(as.character(datab$Disease_Sign))), color=cols_to_use) # merge in the mapping to the df datab_2 <- merge(datab, col_sign_mapping, by="Disease_Sign", all.x=T,all.y=T) %>% select(Sample_ID, Sample_Date, Disease_Sign, start, end, color, Treatment) datab_2 ``` ```{r} ## Extract sample dates sample_dates <- datab_2 %>% select(Sample_ID, Sample_Date) %>% distinct(Sample_ID, .keep_all=TRUE) %>% arrange(Sample_Date) ``` ```{r} ### Plotting # Produce the basic plot plot_data <- gg_vistime(data = datab_2, col.group = "Sample_ID", # Each row will be a sample_ID - proxy for unique star col.event = "Disease_Sign", # Rows will be coloured by the disease-sign show_labels = FALSE, # Remove labels indicating the disease sign axis.ticks.x = FALSE, axis.ticks.y = FALSE) plot_data ``` ```{r} # Tweak the plot plot_data <- plot_data + theme_bw() + ggplot2::theme( plot.title = element_text(size=24), axis.text.x = element_text(size = 12, color = "black", angle = 30, vjust = 1, hjust = 1), axis.text.y = element_text(size = 12, color = "black")) + scale_x_datetime(breaks = breaks_width("1 days"), labels = date_format("%b %d")) plot_data ``` ```{r} # Adding date of sample - based on dates that can be found in the object "sample_dates" we made above plot_data <- plot_data + annotate("point", x = as.POSIXct(sample_dates[1,2]), y = 23, size = 1, colour = "black") + #174 annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 21, size = 1, colour = "black") + #190 annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 19, size = 1, colour = "black") + #231 annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 17, size = 1, colour = "black") + #187 annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 15, size = 1, colour = "black") + #188 annotate("point", x = as.POSIXct(sample_dates[9,2]), y = 13, size = 1, colour = "black") + #228 annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 11, size = 1, colour = "black") + #186 annotate("point", x = as.POSIXct(sample_dates[5,2]), y = 9, size = 1, colour = "black") + #203 annotate("point", x = as.POSIXct(sample_dates[8,2]), y = 7, size = 1, colour = "black") + #209 annotate("point", x = as.POSIXct(sample_dates[2,2]), y = 5, size =1, colour = "black") + #177 annotate("point", x = as.POSIXct(sample_dates[10,2]), y = 3, size =1, colour = "black") + #219 annotate("point", x = as.POSIXct(sample_dates[12,2]), y = 1, size =1, colour = "black") #230 plot_data ``` ```{r} ### Create a legend data_legend <- datab_2 %>% distinct(Disease_Sign, .keep_all=T) %>% arrange(Disease_Sign) data_legend$start <- as.Date("2021-10-05") data_legend$end <- as.Date("2021-10-18") data_legend$Sample_ID <- "Key" data_legend plot_legend <- gg_vistime(data = data_legend, col.group = "Sample_ID", col.event = "Disease_Sign", show_labels = TRUE, linewidth = 10) plot_legend ``` ```{r} # Tweak the legend plot plot_legend <- plot_legend + theme_void() + ggplot2::theme( plot.title = element_text(size=11), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) plot_legend ``` ```{r} ### Combine the main plot and legend into a single figure plot_combined <- plot_grid(plot_data, plot_legend, rel_widths = c(1, 0.15)) plot_combined ``` ```{r} ### Save plot ggplot2::ggsave(plot_combined, file = "../analyses/26-visualizations/ExpB_disease-progression-phenotype-timeline.pdf", dpi=300, height=4, width=7, units="in") ``` --- # Make a PCA of RNAseq data for both experiments combined, pre-DEG analyses Color code by Experiment (A 2021; B 2022) and exposed and control, and also add in juvenile/adult?? make an option with/without the age class for Exp B Read in count matrices ```{r} expAcounts <- read.csv("../data/gene_count_matrix_2021.csv") head(expAcounts) ``` ```{r} expBcounts <- read.csv("../data/gene_count_matrix_2022.csv") head(expBcounts) ``` combine the two tables: ```{r} expABcounts <- left_join(expAcounts, expBcounts, by="gene_id") head(expABcounts) ``` only keep libraries that we did comparisons for: ```{r} expABcounts_sm <- select(expABcounts, "gene_id", "PSC.56", "PSC.52", "PSC.54", "PSC.61", "PSC.64", "PSC.73", "PSC.76", "PSC.81", "PSC.59", "PSC.57", "PSC.69", "PSC.67", "PSC.71", "PSC.75", "PSC.78", "PSC.83", "PSC.0228", "PSC.0187", "PSC.0188", "PSC.0174", "PSC.0190", "PSC.0231", "PSC.0230", "PSC.0219", "PSC.0177", "PSC.0186", "PSC.0209", "PSC.0203") ``` Make a PCA: ```{r} #install.packages("ggfortify") ``` ```{r} library(ggfortify) library(plotly) ``` ```{r} expABcounts_transpose = t(expABcounts_sm) ``` make first row into column names: ```{r} colnames(expABcounts_transpose)=expABcounts_transpose[c(1),] ``` remove first row: ```{r} expABcounts_transpose=expABcounts_transpose[-c(1), ] ``` make it a dataframe ```{r} expABcounts_transpose <- as.data.frame(expABcounts_transpose) ``` make rownames a column called SampleID ```{r} library(tibble) expABcounts_transpose <- tibble::rownames_to_column(expABcounts_transpose, "Sample_ID") ``` make the sample_ID into rownames: ```{r} #expABcounts_transpose <- data.frame(expABcounts_transpose, row.names = 1) ``` ```{r} library(dplyr) expABcounts_transpose <- expABcounts_transpose %>% mutate_at(2:23465, as.numeric) ``` change Sample_ID s into descriptions add a column that tells the treatment group of each sample: ```{r} expABcounts_transpose$Treatment <- c("Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Control_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Exposed_Juvenile", "Exposed_Juvenile", "Exposed_Juvenile", "Exposed_Adult", "Exposed_Adult", "Exposed_Adult", "Control_Juvenile", "Control_Juvenile", "Control_Juvenile", "Control_Adult", "Control_Adult", "Control_Adult") ``` ```{r} expABcounts_trtmnt <- as.data.frame(expABcounts_trtmnt) ``` ```{r} df_nums <- expABcounts_transpose[2:23465] pca <- prcomp(df_nums) p <- autoplot(pca, data = expABcounts_transpose, colour = 'Treatment', frame = TRUE) ggplotly(p) ``` # create a plot showing the average days to death from experiment B comparing adults and juveniles | star_id | age | days_to_death | |---------|----------|---------------| | 3 | adult | 12 | | 5 | adult | 13 | | 6 | adult | 14 | | 10 | adult | 13 | | 13 | adult | 17 | | 18 | adult | 14 | | 22 | adult | 14 | | 28 | adult | 16 | | 30 | adult | 14 | | 37 | juvenile | 11 | | 38 | juvenile | 15 | | 39 | juvenile | 12 | | 44 | juvenile | 11 | | 46 | juvenile | 12 | | 47 | juvenile | 11 | | 50 | juvenile | 12 | | 54 | juvenile | 11 | | 57 | juvenile | 13 | | 61 | juvenile | 15 | I want the plot to have the y-axis be days to death X -axis is two groups: juveniles and adults the plot will show two points -- one for juveniles at the average day to death with error bars, and one for adults with average day to death with error bars https://statdoe.com/step-by-step-scatterplot-for-one-factor-in-r/ ```{r} # loading the appropriate libraries library(readr) library(ggplot2) # loading and checking the data mortexpb <- read_csv("../analyses/26-visualizations/mortality_expB_data.csv") print(mortexpb) ``` ```{r} # scatterplot options(repr.plot.width=2, repr.plot.height=4) ggplot(mortexpb, aes(age, average_days_to_death)) + geom_point(size = 5) + geom_errorbar(aes(ymin=average_days_to_death-sd, ymax=average_days_to_death+sd), width = 1) + labs(x="Age", y="Days To Mortality Post-Exposure") + ylim(0, 20) + theme_bw(base_size = 18) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) ``` ```{r} # saving the final figure ggsave("../analyses/26-visualizations/ExpB_mort_scatterplot.png", width = 4, height = 2.5, dpi = 1000) ``` ## Make a PCA for both experiments' raw RNAseq data, grouping by treatments for all experiments Read in count matrices ```{r} expAcounts <- read.csv("../data/gene_count_matrix_2021.csv") head(expAcounts) ``` ```{r} expBcounts <- read.csv("../data/gene_count_matrix_2022.csv") head(expBcounts) ``` combine the two tables: ```{r} expABcounts <- left_join(expAcounts, expBcounts, by="gene_id") head(expABcounts) ``` only keep libraries that we did comparisons for: ```{r} expABcounts_sm <- select(expABcounts, "gene_id", "PSC.56", "PSC.52", "PSC.54", "PSC.61", "PSC.64", "PSC.73", "PSC.76", "PSC.81", "PSC.59", "PSC.57", "PSC.69", "PSC.67", "PSC.71", "PSC.75", "PSC.78", "PSC.83", "PSC.0228", "PSC.0187", "PSC.0188", "PSC.0174", "PSC.0190", "PSC.0231", "PSC.0230", "PSC.0219", "PSC.0177", "PSC.0186", "PSC.0209", "PSC.0203") ``` Make a PCA: ```{r} #install.packages("ggfortify") ``` ```{r} library(ggfortify) library(plotly) ``` ```{r} expABcounts_transpose = t(expABcounts_sm) ``` make first row into column names: ```{r} colnames(expABcounts_transpose)=expABcounts_transpose[c(1),] ``` remove first row: ```{r} expABcounts_transpose=expABcounts_transpose[-c(1), ] ``` make it a dataframe ```{r} expABcounts_transpose <- as.data.frame(expABcounts_transpose) ``` make rownames a column called SampleID ```{r} library(tibble) expABcounts_transpose <- tibble::rownames_to_column(expABcounts_transpose, "Sample_ID") ``` make the sample_ID into rownames: ```{r} #expABcounts_transpose <- data.frame(expABcounts_transpose, row.names = 1) ``` ```{r} library(dplyr) expABcounts_transpose <- expABcounts_transpose %>% mutate_at(2:23465, as.numeric) ``` change Sample_ID s into descriptions add a column that tells the treatment group of each sample: ```{r} expABcounts_transpose$Treatment <- c("Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Control_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Adult_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Juvenile_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Exposed_Adult_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Juvenile_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge", "Control_Adult_AgeClass_SSWD_Challenge") ``` ```{r} expABcounts_transpose <- as.data.frame(expABcounts_tra) ``` ```{r} df_nums <- expABcounts_transpose[2:23465] pca <- prcomp(df_nums) p <- autoplot(pca, data = expABcounts_transpose, colour = 'Treatment', frame = TRUE) ggplotly(p) ```