--- title: "73 gene methylation" author: Steven Roberts date: "`r format(Sys.time(), '%d %B, %Y')`" output: html_document: theme: readable highlight: zenburn toc: true toc_float: true number_sections: true code_folding: show code_download: true # github_document: # toc: true # toc_depth: 3 # number_sections: true # html_preview: true --- ```{r setup, include=FALSE} #library(kableExtra) # library(DESeq2) # library(pheatmap) # library(RColorBrewer) # library(data.table) #library(DT) # library(Biostrings) #library(methylKit) library(tidyverse) knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = TRUE, # Evaluate code chunks warning = TRUE, # Hide warnings message = TRUE, # Hide messages fig.width = 6, # Set plot width in inches fig.height = 4, # Set plot height in inches fig.align = "center" # Align plots to the center ) ``` Here will try to grab gene level methylation samples and look at differences within sex for oa treatment and control. Possibly something along the lines of ``` grouped_means_by_gene <- data %>% group_by(gene, Sex, Treatment) %>% summarise( mean_mean = mean(mean), mean_sem = mean(sem), .groups = 'drop' # This drops the grouping structure afterwards ) # Printing the result print(grouped_means_by_gene) ``` # Read in Data we have two different outputs from early work ```{r} genemeth18 <- read.csv("../RAnalysis/data/meanmethgene_10x_allsampssex.csv") ``` ```{r} genemeth40 <- read.csv("../output/40-gene-methylation/40-gene-methylation.csv") ``` ```{r} meth_all <- read.delim("../output/40-gene-methylation/meth_all-samples.out", header = FALSE) ``` # Grouping 18 ```{r} grouped18 <- genemeth18 %>% group_by(gene, Sex, Treatment) %>% summarise( mean_mean = mean(mean), mean_sem = mean(sem), .groups = 'drop' # This drops the grouping structure afterwards ) # Printing the result print(grouped18) ``` ```{r} write_csv(grouped18, "../output/73-gene-methylation/grouped18.csv") ``` ```{r} # For Females female_data <- grouped18 %>% filter(Sex == "F") %>% group_by(gene) %>% summarize(meth_diff = mean_mean[Treatment == "Exposed"] - mean_mean[Treatment == "Control"]) # For Males male_data <- grouped18 %>% filter(Sex == "M") %>% group_by(gene) %>% summarize(meth_diff = mean_mean[Treatment == "Exposed"] - mean_mean[Treatment == "Control"]) ``` ```{r} write_csv(male_data, "../output/73-gene-methylation/male_data.csv") write_csv(female_data, "../output/73-gene-methylation/female_data.csv") ``` # Metadata | Sample.ID | OldSample.ID | Treatment | Sex | TreatmentN | Parent.ID | |-----------|--------------|-----------|-----|------------|-----------| | 12M | S12M | Exposed | M | 3 | EM05 | | 13M | S13M | Control | M | 1 | CM04 | | 16F | S16F | Control | F | 2 | CF05 | | 19F | S19F | Control | F | 2 | CF08 | | 22F | S22F | Exposed | F | 4 | EF02 | | 23M | S23M | Exposed | M | 3 | EM04 | | 29F | S29F | Exposed | F | 4 | EF07 | | 31M | S31M | Exposed | M | 3 | EM06 | | 35F | S35F | Exposed | F | 4 | EF08 | | 36F | S36F | Exposed | F | 4 | EF05 | | 39F | S39F | Control | F | 2 | CF06 | | 3F | S3F | Exposed | F | 4 | EF06 | | 41F | S41F | Exposed | F | 4 | EF03 | | 44F | S44F | Control | F | 2 | CF03 | | 48M | S48M | Exposed | M | 3 | EM03 | | 50F | S50F | Exposed | F | 4 | EF01 | | 52F | S52F | Control | F | 2 | CF07 | | 53F | S53F | Control | F | 2 | CF02 | | 54F | S54F | Control | F | 2 | CF01 | | 59M | S59M | Exposed | M | 3 | EM01 | | 64M | S64M | Control | M | 1 | CM05 | | 6M | S6M | Control | M | 1 | CM02 | | 76F | S76F | Control | F | 2 | CF04 | | 77F | S77F | Exposed | F | 4 | EF04 | | 7M | S7M | Control | M | 1 | CM01 | | 9M | S9M | Exposed | M | 3 | EM02 | ``` all <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M", "S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M") controls <- c("S13M", "S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S64M", "S6M", "S76F", "S7M") exposed <- c("S12M", "S22F", "S23M", "S29F", "S31M", "S35F", "S36F", "S3F", "S41F", "S48M", "S50F", "S59M", "S77F", "S9M") females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F", "S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M","S13M", "S64M", "S6M", "S7M") controls_males <- c("S13M", "S64M", "S6M", "S7M") exposed_males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M") controls_females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F") exposed_females <- c("S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") ``` # Wrangling output from 40 ```{r} genemeth40 <- read.csv("../output/40-gene-methylation/40-gene-methylation.csv") ``` ```{r} meth_all <- read.delim("../output/40-gene-methylation/meth_all-samples.out", header = FALSE) ``` ```{r} head(methylation.transposed.rownames) ``` ```{r} head(methylation.transposed.matrix) ``` Lets grab just male data ```{r} methmale <- methylation.transposed.rownames %>% select(name,ends_with("M")) ``` ```{r} controls_males <- c("S13M", "S64M", "S6M", "S7M") exposed_males <- c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M") ``` ```{r} methmale02 <- methmale %>% mutate(across(c("S13M", "S64M", "S6M", "S7M"), ~as.numeric(.), .names = "numeric_{.col}")) %>% mutate(across(c("S12M", "S23M", "S31M", "S48M", "S59M", "S9M"), ~as.numeric(.), .names = "numeric_{.col}")) # Now, calculate the average of the columns methmale02 <- methmale02 %>% mutate(average_control = rowMeans(select(., c("numeric_S13M", "numeric_S64M", "numeric_S6M", "numeric_S7M")), na.rm = TRUE)) %>% mutate(average_exposed = rowMeans(select(., c("numeric_S12M", "numeric_S23M", "numeric_S31M", "numeric_S48M", "numeric_S59M", "numeric_S9M")), na.rm = TRUE)) %>% select(name, average_control,average_exposed) %>% write.csv("../output/73-gene-methylation/male_40_meth.csv", row.names = FALSE, col.names = FALSE, quote = FALSE) ``` # Female data based on 40 controls_females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F") exposed_females <- c("S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") ```{r} methfemale <- methylation.transposed.rownames %>% select(name,ends_with("F")) ``` ```{r} controls_females <- c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F") exposed_females <- c("S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F") ``` ```{r} methfemale02 <- methfemale %>% mutate(across(c("S16F", "S19F", "S39F", "S44F", "S52F", "S53F", "S54F", "S76F","S22F", "S29F", "S35F", "S36F", "S3F", "S41F", "S50F", "S77F"), ~as.numeric(.), .names = "numeric_{.col}")) # Now, calculate the average of the columns methfemale02 <- methfemale02 %>% mutate(average_control = rowMeans(select(., c("numeric_S16F", "numeric_S19F", "numeric_S39F", "numeric_S44F", "numeric_S52F", "numeric_S53F", "numeric_S54F")), na.rm = TRUE)) %>% mutate(average_exposed = rowMeans(select(., c("numeric_S22F", "numeric_S29F", "numeric_S35F", "numeric_S36F", "numeric_S3F", "numeric_S41F", "numeric_S50F", "numeric_S77F")), na.rm = TRUE)) %>% select(name, average_control,average_exposed) %>% write.csv("../output/73-gene-methylation/female_40_meth.csv", row.names = FALSE, col.names = FALSE, quote = FALSE) ```