---
title: "72 exon data rfmt"
author: Steven Roberts
date: "`r format(Sys.time(), '%d %B, %Y')`" 
output: 
  html_document:
    theme: readable
    highlight: zenburn
    toc: true
    toc_float: true
    number_sections: true
    code_folding: show
    code_download: true
  # github_document:
  #   toc: true
  #   toc_depth: 3
  #   number_sections: true
  #   html_preview: true
---

```{r setup, include=FALSE}
#library(kableExtra)
# library(DESeq2)
# library(pheatmap)
# library(RColorBrewer)
# library(data.table)
#library(DT)
# library(Biostrings)
#library(methylKit)
library(tidyverse)

knitr::opts_chunk$set(
  echo = TRUE,         # Display code chunks
  eval = TRUE,         # Evaluate code chunks
  warning = TRUE,     # Hide warnings
  message = TRUE,     # Hide messages
  fig.width = 6,       # Set plot width in inches
  fig.height = 4,      # Set plot height in inches
  fig.align = "center" # Align plots to the center
)
```

We have fold exon expression for all samples


# Metadata

| Sample.ID | OldSample.ID | Treatment | Sex | TreatmentN | Parent.ID |
|-----------|--------------|-----------|-----|------------|-----------|
| 12M       | S12M         | Exposed   | M   | 3          | EM05      |
| 13M       | S13M         | Control   | M   | 1          | CM04      |
| 16F       | S16F         | Control   | F   | 2          | CF05      |
| 19F       | S19F         | Control   | F   | 2          | CF08      |
| 22F       | S22F         | Exposed   | F   | 4          | EF02      |
| 23M       | S23M         | Exposed   | M   | 3          | EM04      |
| 29F       | S29F         | Exposed   | F   | 4          | EF07      |
| 31M       | S31M         | Exposed   | M   | 3          | EM06      |
| 35F       | S35F         | Exposed   | F   | 4          | EF08      |
| 36F       | S36F         | Exposed   | F   | 4          | EF05      |
| 39F       | S39F         | Control   | F   | 2          | CF06      |
| 3F        | S3F          | Exposed   | F   | 4          | EF06      |
| 41F       | S41F         | Exposed   | F   | 4          | EF03      |
| 44F       | S44F         | Control   | F   | 2          | CF03      |
| 48M       | S48M         | Exposed   | M   | 3          | EM03      |
| 50F       | S50F         | Exposed   | F   | 4          | EF01      |
| 52F       | S52F         | Control   | F   | 2          | CF07      |
| 53F       | S53F         | Control   | F   | 2          | CF02      |
| 54F       | S54F         | Control   | F   | 2          | CF01      |
| 59M       | S59M         | Exposed   | M   | 3          | EM01      |
| 64M       | S64M         | Control   | M   | 1          | CM05      |
| 6M        | S6M          | Control   | M   | 1          | CM02      |
| 76F       | S76F         | Control   | F   | 2          | CF04      |
| 77F       | S77F         | Exposed   | F   | 4          | EF04      |
| 7M        | S7M          | Control   | M   | 1          | CM01      |
| 9M        | S9M          | Exposed   | M   | 3          | EM02      |

```
all <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M", "S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M")
controls <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M")
exposed <- c("S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M")
females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F", "S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F")
males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M","S13M", "S64M", "S6M", "S7M")
controls_males <- c("S13M", "S64M", "S6M", "S7M")
exposed_males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M")
controls_females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F")
exposed_females <- c("S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F")
```

# Loading log fold


```{r}
# Define the vector of file paths
logfile_paths <- c(
  "../output/65-exon-coverage/S12M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S13M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S16F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S19F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S22F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S23M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S29F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S31M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S35F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S36F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S39F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S3F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S41F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S44F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S48M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S50F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S52F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S53F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S54F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S59M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S64M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S6M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S76F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S77F_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S7M_exonsum_10_logfold.csv",
  "../output/65-exon-coverage/S9M_exonsum_10_logfold.csv"
)

# Initialize an empty list to store the data frames
logdata_frames <- list()

# Loop through the file paths, read each file, and store it in the list with a named key
for (file_path in logfile_paths) {
  # Extract a meaningful name from the file path, e.g., S12M, S13M, etc.
  name <- gsub(".*/|_exonsum_10_logfold\\.csv$", "", file_path)
  
  # Read the file and assign it to the list with the name as the key
  logdata_frames[[name]] <- read.csv(file_path)
}

# At this point, `data_frames` is a list of data frames.
# You can access each data frame by its name, for example:
# data_frames$S12M  # This will give you the data frame for S12M_exonsum_10_fold.csv

```

```{r}
head(logdata_frames$S12M)
```
```{r}
combined_exon_df <- bind_rows(logdata_frames, .id = "SampleID") %>%
   select(SampleID, GeneID, starts_with("fold")) %>%
   mutate(Sex = substr(SampleID, nchar(SampleID), nchar(SampleID)))
```

```{r}
 male_exon_df <- bind_rows(logdata_frames, .id = "SampleID") %>%
   select(SampleID, GeneID, starts_with("fold")) %>%
   mutate(Sex = substr(SampleID, nchar(SampleID), nchar(SampleID))) %>%
   filter(Sex == "M") %>%
   na.omit()
 
 View(male_exon_df)
 
 female_exon_df <- bind_rows(logdata_frames, .id = "SampleID") %>%
   select(SampleID, GeneID, starts_with("fold")) %>%
   mutate(Sex = substr(SampleID, nchar(SampleID), nchar(SampleID))) %>%
   filter(Sex == "F") %>%
   na.omit()
   
 
 # Write the data frame to a CSV file
 write.csv(male_exon_df, "../output/72-exon-data-rfmt/male_exon_df.csv", row.names = FALSE)
 # Write the data frame to a CSV file
 write.csv(female_exon_df, "../output/72-exon-data-rfmt/female_exon_df.csv", row.names = FALSE)
```


# reformatting to look like


```
x gene01 gene02 gene03
sample_fold1
sample_fold2
x
x
sample2_fold
```

```{r}

```


```{r}
me_transformed <- male_exon_df %>%
  pivot_longer(cols = starts_with("fold"), names_to = "fold_number", values_to = "fold_value") %>%
  unite("SampleID_fold", SampleID, fold_number, sep = "_") %>%
  pivot_wider(names_from = GeneID, values_from = fold_value) %>%
  select(-Sex) %>%
   filter(!if_any(c(SampleID_fold), ~str_detect(.x, "fold1$")))
```


```{r}
fe_transformed <- female_exon_df %>%
  pivot_longer(cols = starts_with("fold"), names_to = "fold_number", values_to = "fold_value") %>%
  unite("SampleID_fold", SampleID, fold_number, sep = "_") %>%
  pivot_wider(names_from = GeneID, values_from = fold_value) %>%
  select(-Sex) %>%
   filter(!if_any(c(SampleID_fold), ~str_detect(.x, "fold1$")))
```


```{r}
 # Write the data frame to a CSV file
 write.csv(me_transformed, "../output/72-exon-data-rfmt/male_exon_tf.csv", row.names = FALSE)
 # Write the data frame to a CSV file
 write.csv(fe_transformed, "../output/72-exon-data-rfmt/female_exon_tf.csv", row.names = FALSE)
```