--- title: "72 exon data rfmt" author: Steven Roberts date: "`r format(Sys.time(), '%d %B, %Y')`" output: html_document: theme: readable highlight: zenburn toc: true toc_float: true number_sections: true code_folding: show code_download: true # github_document: # toc: true # toc_depth: 3 # number_sections: true # html_preview: true --- ```{r setup, include=FALSE} #library(kableExtra) # library(DESeq2) # library(pheatmap) # library(RColorBrewer) # library(data.table) #library(DT) # library(Biostrings) #library(methylKit) library(tidyverse) knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = TRUE, # Evaluate code chunks warning = TRUE, # Hide warnings message = TRUE, # Hide messages fig.width = 6, # Set plot width in inches fig.height = 4, # Set plot height in inches fig.align = "center" # Align plots to the center ) ``` We have fold exon expression for all samples # Metadata | Sample.ID | OldSample.ID | Treatment | Sex | TreatmentN | Parent.ID | |-----------|--------------|-----------|-----|------------|-----------| | 12M | S12M | Exposed | M | 3 | EM05 | | 13M | S13M | Control | M | 1 | CM04 | | 16F | S16F | Control | F | 2 | CF05 | | 19F | S19F | Control | F | 2 | CF08 | | 22F | S22F | Exposed | F | 4 | EF02 | | 23M | S23M | Exposed | M | 3 | EM04 | | 29F | S29F | Exposed | F | 4 | EF07 | | 31M | S31M | Exposed | M | 3 | EM06 | | 35F | S35F | Exposed | F | 4 | EF08 | | 36F | S36F | Exposed | F | 4 | EF05 | | 39F | S39F | Control | F | 2 | CF06 | | 3F | S3F | Exposed | F | 4 | EF06 | | 41F | S41F | Exposed | F | 4 | EF03 | | 44F | S44F | Control | F | 2 | CF03 | | 48M | S48M | Exposed | M | 3 | EM03 | | 50F | S50F | Exposed | F | 4 | EF01 | | 52F | S52F | Control | F | 2 | CF07 | | 53F | S53F | Control | F | 2 | CF02 | | 54F | S54F | Control | F | 2 | CF01 | | 59M | S59M | Exposed | M | 3 | EM01 | | 64M | S64M | Control | M | 1 | CM05 | | 6M | S6M | Control | M | 1 | CM02 | | 76F | S76F | Control | F | 2 | CF04 | | 77F | S77F | Exposed | F | 4 | EF04 | | 7M | S7M | Control | M | 1 | CM01 | | 9M | S9M | Exposed | M | 3 | EM02 | ``` all <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M", "S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M") controls <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M") exposed <- c("S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M") females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F", "S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M","S13M", "S64M", "S6M", "S7M") controls_males <- c("S13M", "S64M", "S6M", "S7M") exposed_males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M") controls_females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F") exposed_females <- c("S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") ``` # Loading log fold ```{r} # Define the vector of file paths logfile_paths <- c( "../output/65-exon-coverage/S12M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S13M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S16F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S19F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S22F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S23M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S29F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S31M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S35F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S36F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S39F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S3F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S41F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S44F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S48M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S50F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S52F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S53F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S54F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S59M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S64M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S6M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S76F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S77F_exonsum_10_logfold.csv", "../output/65-exon-coverage/S7M_exonsum_10_logfold.csv", "../output/65-exon-coverage/S9M_exonsum_10_logfold.csv" ) # Initialize an empty list to store the data frames logdata_frames <- list() # Loop through the file paths, read each file, and store it in the list with a named key for (file_path in logfile_paths) { # Extract a meaningful name from the file path, e.g., S12M, S13M, etc. name <- gsub(".*/|_exonsum_10_logfold\\.csv$", "", file_path) # Read the file and assign it to the list with the name as the key logdata_frames[[name]] <- read.csv(file_path) } # At this point, `data_frames` is a list of data frames. # You can access each data frame by its name, for example: # data_frames$S12M # This will give you the data frame for S12M_exonsum_10_fold.csv ``` ```{r} head(logdata_frames$S12M) ``` ```{r} combined_exon_df <- bind_rows(logdata_frames, .id = "SampleID") %>% select(SampleID, GeneID, starts_with("fold")) %>% mutate(Sex = substr(SampleID, nchar(SampleID), nchar(SampleID))) ``` ```{r} male_exon_df <- bind_rows(logdata_frames, .id = "SampleID") %>% select(SampleID, GeneID, starts_with("fold")) %>% mutate(Sex = substr(SampleID, nchar(SampleID), nchar(SampleID))) %>% filter(Sex == "M") %>% na.omit() View(male_exon_df) female_exon_df <- bind_rows(logdata_frames, .id = "SampleID") %>% select(SampleID, GeneID, starts_with("fold")) %>% mutate(Sex = substr(SampleID, nchar(SampleID), nchar(SampleID))) %>% filter(Sex == "F") %>% na.omit() # Write the data frame to a CSV file write.csv(male_exon_df, "../output/72-exon-data-rfmt/male_exon_df.csv", row.names = FALSE) # Write the data frame to a CSV file write.csv(female_exon_df, "../output/72-exon-data-rfmt/female_exon_df.csv", row.names = FALSE) ``` # reformatting to look like ``` x gene01 gene02 gene03 sample_fold1 sample_fold2 x x sample2_fold ``` ```{r} ``` ```{r} me_transformed <- male_exon_df %>% pivot_longer(cols = starts_with("fold"), names_to = "fold_number", values_to = "fold_value") %>% unite("SampleID_fold", SampleID, fold_number, sep = "_") %>% pivot_wider(names_from = GeneID, values_from = fold_value) %>% select(-Sex) %>% filter(!if_any(c(SampleID_fold), ~str_detect(.x, "fold1$"))) ``` ```{r} fe_transformed <- female_exon_df %>% pivot_longer(cols = starts_with("fold"), names_to = "fold_number", values_to = "fold_value") %>% unite("SampleID_fold", SampleID, fold_number, sep = "_") %>% pivot_wider(names_from = GeneID, values_from = fold_value) %>% select(-Sex) %>% filter(!if_any(c(SampleID_fold), ~str_detect(.x, "fold1$"))) ``` ```{r} # Write the data frame to a CSV file write.csv(me_transformed, "../output/72-exon-data-rfmt/male_exon_tf.csv", row.names = FALSE) # Write the data frame to a CSV file write.csv(fe_transformed, "../output/72-exon-data-rfmt/female_exon_tf.csv", row.names = FALSE) ```