---
title: "Ptuh known miRNA interactions"
author: "Jill Ashey"
date: "2025-02-07"
output: html_document
---

This script will use topGO to analyze functional enrichment of targets of known miRNAs for Ptuh 

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

library(topGO)
library(tidyverse)
```

Read in shortstack results 
```{r}
data <- read.delim("../../F-Ptuh/output/05-Ptuh-sRNA-ShortStack_4.1.0/ShortStack_out/Results.txt")
head(data)
```

Filter for miRNAs and known miRNAs
```{r}
data <- data %>%
  filter(MIRNA == "Y") 
dim(data) # should be 37

data_known <- data[!is.na(data$known_miRNAs), ]
```

There are 9 miRNAs previously identified in other studies. Here are the ones that I want to investigate further:
- Cluster_1116 = miR-2036
- Cluster_1296 = miR-100
- Cluster_1793 = miR-2023
- Cluster_2973 = miR-2030
- Cluster_4039 = miR-2025

# miR-2036

## Functional enrichment of targets negatively correlated with miR-2036 (regardless of correlation significance)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(miRNA == "Cluster_1116")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

Read in gene-to-go-mappings
```{r}
gene2go_topgo <- readMappings("../../M-multi-species/data/Pmea_gene2go.tab", IDsep = ";", sep = "\t")
```

Set function to select genes of interest (ie those that have pvalue < 0.05)
```{r}
topDiffGenes <- function(allScore) {
return(allScore < 0.05)}
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2036-topGO_neg_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2036-topGO_neg_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2036-topGO_neg_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets positively correlated with miR-2036 (regardless of correlation significance)

Filter so that only negative correlations remain 
```{r}
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(miRNA == "Cluster_1116")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(pos_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2036-topGO_pos_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2036-topGO_pos_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2036-topGO_pos_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets significantly negatively correlated with miR-2036

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(p_value < 0.05) %>%
  filter(miRNA == "Cluster_1116")
```

There are no significantly negative targets. 

## Functional enrichment of targets significantly positively correlated with miR-2036 

Filter so that only negative correlations remain 
```{r}
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(p_value < 0.05) %>%
  filter(miRNA == "Cluster_1116")
```

There are no significantly positive targets. 

# miR-100

## Functional enrichment of targets negatively correlated with miR-100 (regardless of correlation pvalue)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(miRNA == "Cluster_1296")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

Read in gene-to-go-mappings
```{r}
gene2go_topgo <- readMappings("../../M-multi-species/data/Pmea_gene2go.tab", IDsep = ";", sep = "\t")
```

Set function to select genes of interest (ie those that have pvalue < 0.05)
```{r}
topDiffGenes <- function(allScore) {
return(allScore < 0.05)}
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir100-topGO_neg_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir100-topGO_neg_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir100-topGO_neg_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets positively correlated with miR-100 (regardless of correlation significance)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(miRNA == "Cluster_1296")
```


Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(pos_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `pos_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(pos_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir100-topGO_pos_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir100-topGO_pos_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir100-topGO_pos_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets significantly negatively correlated with miR-100 

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(p_value < 0.05) %>%
  filter(miRNA == "Cluster_1296")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

There are no annotations for the significant genes. 

## Functional enrichment of targets significantly positively correlated with miR-100

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only positive correlations remain 
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(p_value < 0.05) %>%
  filter(miRNA == "Cluster_1296")
```

Only one sig positively correlated pair and no GO terms are assigned to it. 

# miR-2023

## Functional enrichment of targets negatively correlated with miR-2023 (regardless of correlation pvalue)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(miRNA == "Cluster_1793")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

Read in gene-to-go-mappings
```{r}
gene2go_topgo <- readMappings("../../M-multi-species/data/Pmea_gene2go.tab", IDsep = ";", sep = "\t")
```

Set function to select genes of interest (ie those that have pvalue < 0.05)
```{r}
topDiffGenes <- function(allScore) {
return(allScore < 0.05)}
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_neg_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_neg_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_neg_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets positively correlated with miR-100 (regardless of correlation significance)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(miRNA == "Cluster_1296")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(pos_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `pos_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(pos_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_pos_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_pos_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_pos_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets significantly negatively correlated with miR-2023 

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(p_value < 0.05) %>%
  filter(miRNA == "Cluster_1793")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

Read in gene-to-go-mappings
```{r}
gene2go_topgo <- readMappings("../../M-multi-species/data/Pmea_gene2go.tab", IDsep = ";", sep = "\t")
```

Set function to select genes of interest (ie those that have pvalue < 0.05)
```{r}
topDiffGenes <- function(allScore) {
return(allScore < 0.05)}
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_sig_neg_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_sig_neg_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_sig_neg_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets significantly positively correlated with miR-2023

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(miRNA == "Cluster_1793")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(pos_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `pos_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(pos_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_sig_pos_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_sig_pos_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2023-topGO_sig_pos_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

# miR-2030

## Functional enrichment of targets negatively correlated with miR-2030 (regardless of correlation pvalue)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(miRNA == "Cluster_2973")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

Read in gene-to-go-mappings
```{r}
gene2go_topgo <- readMappings("../../M-multi-species/data/Pmea_gene2go.tab", IDsep = ";", sep = "\t")
```

Set function to select genes of interest (ie those that have pvalue < 0.05)
```{r}
topDiffGenes <- function(allScore) {
return(allScore < 0.05)}
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_neg_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_neg_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_neg_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets positively correlated with miR-2030 (regardless of correlation significance)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(miRNA == "Cluster_2973")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(pos_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `pos_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(pos_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_pos_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_pos_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_pos_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets significantly negatively correlated with miR-2030 

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(p_value < 0.05) %>%
  filter(miRNA == "Cluster_2973")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

Read in gene-to-go-mappings
```{r}
gene2go_topgo <- readMappings("../../M-multi-species/data/Pmea_gene2go.tab", IDsep = ";", sep = "\t")
```

Set function to select genes of interest (ie those that have pvalue < 0.05)
```{r}
topDiffGenes <- function(allScore) {
return(allScore < 0.05)}
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_sig_neg_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_sig_neg_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_sig_neg_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets significantly positively correlated with miR-2023

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(miRNA == "Cluster_1296")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(pos_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `pos_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(pos_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_sig_pos_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_sig_pos_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2030-topGO_sig_pos_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

# miR-2025

## Functional enrichment of targets negatively correlated with miR-2025 (regardless of correlation pvalue)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(miRNA == "Cluster_4039")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

Read in gene-to-go-mappings
```{r}
gene2go_topgo <- readMappings("../../M-multi-species/data/Pmea_gene2go.tab", IDsep = ";", sep = "\t")
```

Set function to select genes of interest (ie those that have pvalue < 0.05)
```{r}
topDiffGenes <- function(allScore) {
return(allScore < 0.05)}
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_neg_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_neg_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_neg_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets positively correlated with miR-2025 (regardless of correlation significance)

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(miRNA == "Cluster_4039")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(pos_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `pos_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(pos_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_pos_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_pos_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_pos_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets significantly negatively correlated with miR-2025 

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
neg_corr_data <- pcc_data %>%
  filter(PCC.cor < 0) %>%
  filter(p_value < 0.05) %>%
  filter(miRNA == "Cluster_4039")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(neg_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

Read in gene-to-go-mappings
```{r}
gene2go_topgo <- readMappings("../../M-multi-species/data/Pmea_gene2go.tab", IDsep = ";", sep = "\t")
```

Set function to select genes of interest (ie those that have pvalue < 0.05)
```{r}
topDiffGenes <- function(allScore) {
return(allScore < 0.05)}
```

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `neg_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(neg_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_sig_neg_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_sig_neg_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_sig_neg_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```

## Functional enrichment of targets significantly positively correlated with miR-2025

Read in PCC miranda data
```{r}
pcc_data <- read.csv("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-miranda_PCC_miRNA_mRNA.csv")
head(pcc_data)

# Filter so that only negative correlations remain 
pos_corr_data <- pcc_data %>%
  filter(PCC.cor > 0) %>%
  filter(miRNA == "Cluster_4039")
```

Read in gene2go information 
```{r}
ptuh_gene2go <- read.delim("../../M-multi-species/data/Pmea_gene2go.tab", sep = "\t")
colnames(ptuh_gene2go) <- c("mRNA", "GO.ID")
```

Make list of genes for input to topGO
```{r}
# Genes of interest - ie those targeted by miRNAs
target_genes <- as.character(unique(pos_corr_data$mRNA))

# All genes 
all_genes <- as.character(ptuh_gene2go$mRNA)

# Apply 1 or 0 if gene is gene of interest 
GeneList <- factor(as.integer(all_genes %in% target_genes))
names(GeneList) <- all_genes
```

The following code will perform GO enrichment using the weighted Fisher's exact test to assess whether specific GO terms are overrepresented in the genes targeted by miRNAs, regardless of correlation significance. 

### Biological Processes

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_BP <-new("topGOdata", ontology="BP", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_BP_FE <- runTest(GO_BP, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_BP_En <- GenTable(GO_BP, Fisher = GO_BP_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_BP_En$Fisher<-as.numeric(GO_BP_En$Fisher)
GO_BP_En_sig<-GO_BP_En[GO_BP_En$Fisher<0.05,]
```

Merge `GO_BP_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_BP_En_sig$GO.ID <- trimws(GO_BP_En_sig$GO.ID)

# Join the datasets based on GO term
GO_BP_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_BP_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_BP_En_sig_gene$ontology <- "Biological Processes"

# Keep only unique rows 
GO_BP_En_sig_gene <- unique(GO_BP_En_sig_gene)
```

### Cellular Components

### Molecular Functions

Create `topGOdata` object, which is required for topGO analysis
```{r}
GO_MF <-new("topGOdata", ontology="MF", gene2GO=gene2go_topgo, allGenes=GeneList, annot = annFUN.gene2GO, geneSel=topDiffGenes)
```

Run GO enrichment test 
```{r}
GO_MF_FE <- runTest(GO_MF, algorithm="weight01", statistic="fisher")
```

Generate results table 
```{r}
GO_MF_En <- GenTable(GO_MF, Fisher = GO_MF_FE, orderBy = "Fisher",  topNodes = 100, numChar = 51)
```
Only taking the top 100 GO terms 

Filter by significant results
```{r}
GO_MF_En$Fisher<-as.numeric(GO_MF_En$Fisher)
GO_MF_En_sig<-GO_MF_En[GO_MF_En$Fisher<0.05,]
```

Merge `GO_MF_En_sig` with GO and gene info. 
```{r}
# Separate GO terms 
ptuh_gene2go <- ptuh_gene2go %>%
  separate_rows(GO.ID, sep = ";")

# Ensure GO terms in both datasets are formatted similarly (trim whitespaces)
ptuh_gene2go$GO.ID <- trimws(ptuh_gene2go$GO.ID)
GO_MF_En_sig$GO.ID <- trimws(GO_MF_En_sig$GO.ID)

# Join the datasets based on GO term
GO_MF_En_sig_gene <- ptuh_gene2go %>%
  left_join(GO_MF_En_sig, by = "GO.ID") %>%
  na.omit()

# Add ontology column 
GO_MF_En_sig_gene$ontology <- "Molecular Functions"

# Keep only unique rows 
GO_MF_En_sig_gene <- unique(GO_MF_En_sig_gene)
```

### Join ontologies 

Bind so there is a df that has significantly enriched GO terms for all ontologies 
```{r}
GO_neg_corr_df <- rbind(GO_BP_En_sig_gene, GO_MF_En_sig_gene)
```

Merge with `pos_corr_data`
```{r}
test <- GO_neg_corr_df %>%
  inner_join(pos_corr_data, by = "mRNA") #%>%
  #mutate(direction = ifelse(PCC.cor > 0, "Positive", "Negative")) #%>%
  #filter(ontology != "Cellular Components") #%>%
  #filter(p_value < 0.1)

# Save as csv 
write.csv(test, "../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_sig_pos_corr_target_enrichment.csv")
```

Plot!
```{r}
plot<-ggplot(test, aes(x = Term, y = Fisher, fill = p_value)) +
  #ylim(0, 1) +
  #geom_hline(yintercept = 0.05, linetype = "solid", color = "black", linewidth = 1)+
  #geom_hline(yintercept = 0.05, color = "black", linetype = "solid", linewidth = 0.5) +  # Add line at 0.05 
  geom_point(shape = 21, size = 5) + 
  #scale_size(range = c(2, 20)) + 
  xlab('') + 
  ylab("Fisher p-value") +
  theme_bw(base_size = 24) +
  facet_grid(vars(ontology), scales = "free", space = "free_y") +
  coord_flip(); plot

# Save plot 
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_sig_pos_corr_target_enrichment.pdf", plot, width = 20, height = 35, dpi = 300)
ggsave("../../F-Ptuh/output/11-Ptuh-mRNA-miRNA-interactions/Ptuh-mir2025-topGO_sig_pos_corr_target_enrichment.png", plot, width = 20, height = 35, dpi = 300)
```