--- title: "01.01-supplemental-tables" author: "Sam White" date: "2024-02-27" output: html_document --- File for producing miscellaneous supplemental tables. All outputs can be found in the [`supplemental-files` directory](https://github.com/sr320/ceabigr/tree/main/supplemental-files). # Load `R` libraries ```{r} library("tidyverse") ``` # Set variables ## Vectors of experimental groups ```{r set-experimental-groups} # Vectors for subsetting samples by different groups controls <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M") exposed <- c("S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M") males_controls <- c("S13M", "S64M", "S6M", "S7M") males_exposed <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M") females_controls <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F") females_exposed <- c("S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") ``` # Functions ## Calculate mean FPKM for each gene within a group of samples ```{r function-mean-fpkm-per-gene} calculate_mean_FPKM <- function(samples) { whole_gx_table %>% select(name, starts_with("FPKM.")) %>% pivot_longer(cols = -name, names_sep = "\\.", names_to = c("metric", "sample")) %>% filter(sample %in% samples) %>% group_by(name) %>% summarize(mean_gene_FPKM = mean(value, na.rm = TRUE)) } ``` # Create canonical table max transcripts and predominant isoforms ## Read in predominant isoforms, max transcripts, and gene expression ```{r} predom_fcoe_mcoe <- read.csv("../output/42-predominant-isoform/predom_iso-FEMALE-and-MALE-controls-exposed.csv") max_transcripts_fmcoe_mcoe <- read.csv(file = "../output/34-transcript-counts/max.transcripts.females-c-e.males-c-e.csv") whole_gx_table <- read.csv(file = "../data/whole_gx_table.csv") str(predom_fcoe_mcoe) str(max_transcripts_fmcoe_mcoe) str(whole_gx_table) ``` ## Mean gene FPKMs ```{r} # Calculate mean FPKM for each group mean_FPKM <- list( females_controls = calculate_mean_FPKM(females_controls), females_exposed = calculate_mean_FPKM(females_exposed), males_controls = calculate_mean_FPKM(males_controls), males_exposed = calculate_mean_FPKM(males_exposed) ) # Combine the results into a single data frame mean_FPKM_combined <- bind_rows(mean_FPKM, .id = "Group") # Pivot the data to the desired layout mean_FPKM_final <- mean_FPKM_combined %>% pivot_wider(names_from = "Group", values_from = "mean_gene_FPKM") %>% mutate(name = as.character(name)) # Rename the columns using the names of the original vectors new_colnames <- names(mean_FPKM) colnames(mean_FPKM_final)[-1] <- paste(new_colnames, "mean_gene_FPKM", sep = "_") # Remove the first row # Summary data row lefover from Ballgown mean_FPKM_final <- mean_FPKM_final %>% slice(-1) # Rename the column 'name' to 'gene_name' mean_FPKM_final <- mean_FPKM_final %>% rename(gene_name = name) # Remove the prefix "gene-" from the gene_name column mean_FPKM_final <- mean_FPKM_final %>% mutate(gene_name = str_remove(gene_name, "gene-")) # View the result str(mean_FPKM_final) ``` ## Join ```{r} max_transcripts_predom_isos <- max_transcripts_fmcoe_mcoe %>% left_join(predom_fcoe_mcoe, by = "gene_name") %>% left_join(mean_FPKM_final, by = "gene_name") str(max_transcripts_predom_isos) ``` ## Write to file ```{r} # Write to CSV write.csv(max_transcripts_predom_isos, file = "../supplemental-files/01.01-fmcoe-max-predom-isos-gene_fpkm.csv", quote = FALSE, row.names = FALSE ) ```