--- title: "Exon-intersect-meth" output: html_document --- #Load necessary libraries ```{r} library(tidyverse) ``` #add exon number to exon bed file ```{r} exons <- read.table("~/ceabigr/genome-features/C_virginica-3.0_Gnomon_exon_sorted_yrv.bed", sep = "\t", header = F) exon.rle <- rle(exons$V1) exons$exon_name <- paste0(rep(exon.rle$values, times = exon.rle$lengths), "_", unlist(lapply(exon.rle$lengths, seq_len))) ``` # CODE TO GET METHYLATION INTERSECT WITH Exons ```{bash} cd /home/shared/8TB_HDD_01/sr320/github/ceabigr/data FILES=$(ls *bedgraph) for file in ${FILES} do NAME=$(echo ${file} | awk -F "_" '{print $1}') echo ${NAME} /home/shared/bedtools2/bin/intersectBed \ -wb \ -a ${NAME}_R1_val_1_10x.bedgraph \ -b ../genome-features/C_virginica-3.0_Gnomon_exon_sorted_yrv.bed \ > ~/ceabigr/output/${NAME}_mExon.out done ``` # Get gene percent methylation by calculating the mean and median Loci percent methylation for each sample #Want to see unique transposable elements through 26 different files/samples ```{r} # create a vector of filenames with full path filenames <- list.files(path = "~/ceabigr/output", pattern = "_mExon.out", full.names = TRUE) b <- data.frame() # create empty dataframe to be populated with mean & median summary stats for each feature within a sample for (i in 1:length(filenames)) { print(filenames[i]) # print out file location and name testMte <- read.csv(file = filenames[i], sep = "\t", header = FALSE) # read in each sample data # summarize methylation data per feature, chromosome, start, and end position with mean & median group12 <- testMte %>% group_by(V5,V6,V7) %>% summarize(avg = mean(V4, na.rm=TRUE), median=median(V4, na.rm=TRUE)) %>% # add new column with sample name mutate(sample=gsub("/home/shared/8TB_HDD_02/strigg/ceabigr/output/", "", filenames[i])) # combine feature mean and median methylation for each sample in one common dataframe called b b <- rbind(b, group12) } #rename columns #rename columns colnames(b) <- c("chr","start","stop","mean_meth", "median_meth", "sample") b$sample <- gsub("mExon.out","", b$sample) #write out dataframe write.table(b, file = "~/ceabigr/output/exon_summary_allsamples.txt",quote = F, row.names = F, sep = "\t") ```