---
title: "mRNA-miRNA interactions"
author: "Jill Ashey"
date: "2024-10-01"
output: html_document
---

I attempted to use Part 1 of the R package [mirTarRnaSeq](https://bioconductor.org/packages/3.13/bioc/html/mirTarRnaSeq.html), which an be used for interactive mRNA miRNA sequencing statistical analysis, to assess coexpression between mRNAs and miRNAs but ran into a lot of issues getting my data to run properly. After meeting with the e5 group, we decided to examine correlations based on expression levels instead. 

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

#BiocManager::install("SPONGE")

library(tidyverse)
#library(mirTarRnaSeq)
library(reshape2)
#library(SPONGE)
library(pheatmap)
library(energy)
library(parallel)
library(ggraph)
library(tidygraph)
library(igraph)
library(genefilter)
library(gridExtra)
```

Read in mRNA data 
```{r}
mRNA_counts <- read_csv("../../D-Apul/output/07-Apul-Hisat/gene_count_matrix.csv")
mRNA_counts <- as.data.frame(mRNA_counts)
rownames(mRNA_counts) <- mRNA_counts[,1] #set first column that contains gene names as rownames
mRNA_counts <- mRNA_counts[,-1] # remove column w/ gene names 

# Remove any genes with 0 counts across samples 
mRNA_counts<-mRNA_counts %>%
     mutate(Total = rowSums(.[, 1:5]))%>%
    filter(!Total==0)%>%
    dplyr::select(!Total)
```

Read in miRNA data
```{r}
miRNA_counts <- read.delim("../../D-Apul/output/03.1-Apul-sRNA-summary/Apul_counts_miRNA_normalized.txt")
head(miRNA_counts)

# Remove sample150 column from mirna counts and remove any miRNAs with 0 for all samples 
miRNA_counts <- miRNA_counts %>%
  #select(-sample150) %>%
     mutate(Total = rowSums(.[, 1:5]))%>%
    filter(!Total==0)%>%
    dplyr::select(!Total)

# Rename gene count cols to match miRNA count cols
colnames(mRNA_counts) <- c("sample140", "sample145", "sample150", "sample173", "sample178")
```

Read in miranda data 
```{r}
miranda_apul <- read.delim("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/miranda_strict_all_1kb_parsed_apul_updated.txt", header = F)
colnames(miranda_apul) <- c("miRNA", "mRNA", "score", "energy", "query_start_end", "subject_start_end", "total_bp_shared", "query_similar", "subject_similar")

# Format miranda df 
miranda_apul$miRNA <- sub("^>", "", miranda_apul$miRNA)  # Remove leading ">"
miranda_apul$miRNA <- sub("\\..*", "", miranda_apul$miRNA)  # Remove everything from the first period onwards
miranda_apul$mRNA <- sub("::.*", "", miranda_apul$mRNA)  # Remove everything from "::" onwards

length(unique(miranda_apul$miRNA))
length(unique(miranda_apul$mRNA))
dim(miranda_apul)
```

Summarize how many mRNAs each miRNA are predicted to interact with. Summarize how many miRNAs each mRNA are predicted to interact with. 
```{r}
sum_mirna <- miranda_apul %>%
  group_by(miRNA) %>%
  summarise(mRNA_count = n_distinct(mRNA)) %>% 
  arrange(desc(mRNA_count))

ggplot(sum_mirna %>% arrange(desc(mRNA_count)), 
       aes(x = forcats::fct_reorder(miRNA, mRNA_count), y = mRNA_count)) +
  geom_col() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "miRNA", y = "mRNA Count", title = "miRNA Regulation of mRNAs") #+
  #coord_flip()  # Optional: horizontal bar plot for better readability
#ggsave("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-miRNA_number_of_targets.png", last_plot(), width = 12, height = 10, dpi = 300)
#ggsave("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-miRNA_number_of_targets.pdf", last_plot(), width = 12, height = 10, dpi = 300)

# mRNA
sum_mrna <- miranda_apul %>%
  group_by(mRNA) %>%
  summarise(miRNA_count = n_distinct(miRNA))
```

Normalize counts
```{r}
# Function to normalize counts (simple RPM normalization)
normalize_counts <- function(counts) {
  rpm <- t(t(counts) / colSums(counts)) * 1e6
  return(rpm)
}

# Normalize miRNA and mRNA counts
miRNA_norm <- normalize_counts(miRNA_counts)
#miRNA_norm <- as.matrix(miRNA_counts_filt)

mRNA_norm <- normalize_counts(mRNA_counts)
#mRNA_norm <- as.matrix(mRNA_counts_filt)
```

Run distance correlation using the [energy package in R](https://cran.r-project.org/web/packages/energy/index.html) with rows as features (ie mRNA or miRNA) and columns as samples. 
```{r}
# Initialize a matrix to store the results
dcor_matrix <- matrix(0, nrow = nrow(miRNA_norm), ncol = nrow(mRNA_norm))
rownames(dcor_matrix) <- rownames(miRNA_norm)
colnames(dcor_matrix) <- rownames(mRNA_norm)

# Calculate distance correlation for each miRNA-mRNA pair
for (i in 1:nrow(miRNA_norm)) {
  for (j in 1:nrow(mRNA_norm)) {
    dcor_matrix[i, j] <- dcor(miRNA_norm[i,], mRNA_norm[j,])
  }
}

# Melt data
dcor_melted <- melt(dcor_matrix)
colnames(dcor_melted) <- c("miRNA", "mRNA", "Distance_Correlation")
head(dcor_melted)
```

The `dcor` function calculates distance correlation between each mRNA and miRNA. The output value ranges from 0 to 1, where 0 indicates independence and 1 indicates perfect dependence. Unlike Pearson's correlation, this calculation can detect non-linear relationships between mRNA and miRNAs. However, it does not calculate directionality of the relationship. 

I will use the signed distance correlation to indicate overall linear trend in the data using Pearson's correlation. I will also use several methods to obtain pvalues for distance correlation: permutation test, chi-square test, and student's t-distribution. I wrote functions to calculate these things below.
```{r}
# Function to calculate signed distance correlation
signed_dcor <- function(x, y) {
  dc <- dcor(x, y)
  sign <- sign(cor(x, y, method = "pearson"))
  return(sign * dc)
}

# Function to calculate p-value using permutation test
dcor_pvalue_perm <- function(x, y, R = 1000) { # can change R value to make less computationally intense 
  observed_dcor <- dcor(x, y)
  n <- length(x)
  
  permuted_dcors <- replicate(R, {
    perm_y <- sample(y)
    dcor(x, perm_y)
  })
  
  pvalue <- mean(permuted_dcors >= observed_dcor)
  return(pvalue)
}

# Function to calculate p-value using chi-square approximation
dcor_pvalue_chisq <- function(x, y) {
  n <- length(x)
  dc <- dcor(x, y)
  statistic <- n * dc^2
  pvalue <- pchisq(statistic, df = 1, lower.tail = FALSE)
  return(pvalue)
}

# Initialize matrices to store results
signed_dcor_matrix <- matrix(0, nrow = nrow(miRNA_norm), ncol = nrow(mRNA_norm))
pvalue_perm_matrix <- matrix(1, nrow = nrow(miRNA_norm), ncol = nrow(mRNA_norm))
pvalue_chisq_matrix <- matrix(1, nrow = nrow(miRNA_norm), ncol = nrow(mRNA_norm))
pvalue_t_matrix <- matrix(1, nrow = nrow(miRNA_norm), ncol = nrow(mRNA_norm))

rownames(signed_dcor_matrix) <- rownames(miRNA_norm)
colnames(signed_dcor_matrix) <- rownames(mRNA_norm)
rownames(pvalue_perm_matrix) <- rownames(miRNA_norm)
colnames(pvalue_perm_matrix) <- rownames(mRNA_norm)
rownames(pvalue_chisq_matrix) <- rownames(miRNA_norm)
colnames(pvalue_chisq_matrix) <- rownames(mRNA_norm)
rownames(pvalue_t_matrix) <- rownames(miRNA_norm)
colnames(pvalue_t_matrix) <- rownames(mRNA_norm)

# Calculate signed distance correlation and p-values for each miRNA-mRNA pair
for (i in 1:nrow(miRNA_norm)) {
  for (j in 1:nrow(mRNA_norm)) {
    signed_dcor_matrix[i, j] <- signed_dcor(miRNA_norm[i,], mRNA_norm[j,])
    pvalue_perm_matrix[i, j] <- dcor_pvalue_perm(miRNA_norm[i,], mRNA_norm[j,])
    pvalue_chisq_matrix[i, j] <- dcor_pvalue_chisq(miRNA_norm[i,], mRNA_norm[j,])
    pvalue_t_matrix[i, j] <- dcor_pvalue_t(miRNA_norm[i,], mRNA_norm[j,])
  }
}

# Melt 
signed_dcor_matrix_melt <- melt(signed_dcor_matrix)
colnames(signed_dcor_matrix_melt) <- c("miRNA", "mRNA", "dcor_corr_direction")

pvalue_perm_matrix_melt <- melt(pvalue_perm_matrix)
colnames(pvalue_perm_matrix_melt) <- c("miRNA", "mRNA", "pvalue_perm")

pvalue_chisq_matrix_melt <- melt(pvalue_chisq_matrix)
colnames(pvalue_chisq_matrix_melt) <- c("miRNA", "mRNA", "pvalue_chisq")

pvalue_t_matrix_melt <- melt(pvalue_t_matrix)
colnames(pvalue_t_matrix_melt) <- c("miRNA", "mRNA", "pvalue_t")
```

The for loop will take a while to run, as it is computationally intensive. 

Join all dfs so that there is one df with mRNA, miRNA, distance correlation, directionality, and various pvalue information. 
```{r}
all_dcor <- cbind(dcor_melted, signed_dcor_matrix_melt[,3], pvalue_perm_matrix_melt[,3], pvalue_chisq_matrix_melt[,3], pvalue_t_matrix_melt[,3])
colnames(all_dcor) <- c("miRNA", "mRNA", "Distance_Correlation", "dcor_corr_direction", "pvalue_perm", "pvalue_chisq", "pvalue_t")
head(all_dcor)
```

Merge with miranda data 
```{r}
combined_data <- all_dcor %>%
  inner_join(miranda_apul, by = c("miRNA", "mRNA")) #%>%
  #filter(dcor_corr_direction != 0)
head(combined_data)
```

Investigate data 
```{r}
# How many p-values are < 0.05 or < 0.1?
pvalue_summary <- combined_data %>%
  summarise(
    pvalue_perm_gt_0.05 = sum(pvalue_perm < 0.05),
    pvalue_perm_gt_0.1 = sum(pvalue_perm < 0.1),
    pvalue_chisq_gt_0.05 = sum(pvalue_chisq < 0.05),
    pvalue_chisq_gt_0.1 = sum(pvalue_chisq < 0.1),
    pvalue_t_gt_0.05 = sum(pvalue_t < 0.05),
    pvalue_t_gt_0.1 = sum(pvalue_t < 0.1)
  )
print(pvalue_summary)

# How many pairs have a distance correlation > 0.5?
dc_gt_0.5 <- sum(combined_data$Distance_Correlation > 0.5)
cat("\nPairs with Distance Correlation > 0.5:", dc_gt_0.5, "\n")

# How many have a dcor_corr_direction that does not equal 0?
dcor_dir_not_0 <- sum(combined_data$dcor_corr_direction != 0)
cat("Pairs with dcor_corr_direction != 0:", dcor_dir_not_0, "\n")

# Are there any pairs that have a dcor_corr_direction that does not equal 0 and a p-value < 0.05?
pairs_of_interest <- combined_data %>%
  filter(dcor_corr_direction != 0 & 
         (pvalue_perm < 0.05 | pvalue_chisq < 0.05 | pvalue_t < 0.05))
cat("Pairs with dcor_corr_direction != 0 and any p-value < 0.05:", nrow(pairs_of_interest), "\n")

# Are there any pairs that have a Distance correlation > 0.5 and a p-value < 0.05?
pairs_of_interest_dist <- combined_data %>%
  filter(Distance_Correlation > 0.5 & 
         (pvalue_perm < 0.05 | pvalue_chisq < 0.05 | pvalue_t < 0.05))
cat("Pairs with Distance correlation > 0.5 and any p-value < 0.05:", nrow(pairs_of_interest_dist), "\n")
```

miRNA Cluster_1826 and mRNA FUN_019563 were identified as a pair of interest. 

Visualize 
```{r}
ggplot(combined_data, aes(x = Distance_Correlation, y = dcor_corr_direction)) +
  geom_point(alpha = 0.5) +
  theme_minimal() +
  labs(title = "Distance Correlation vs Correlation Direction",
       x = "Distance Correlation",
       y = "Correlation Direction")

# Create an edge list from the miRNA and mRNA columns
edges <- pairs_of_interest_dist[, c("miRNA", "mRNA")]

# Create the graph
g <- graph_from_data_frame(edges, directed = FALSE)

# Add edge attributes
E(g)$weight <- pairs_of_interest_dist$Distance_Correlation
E(g)$direction <- pairs_of_interest_dist$dcor_corr_direction
E(g)$score <- pairs_of_interest_dist$score
E(g)$energy <- pairs_of_interest_dist$energy

# Plot the graph
plot(g,
     edge.width = E(g)$weight * 5,  # Adjust edge width based on correlation
     edge.color = ifelse(E(g)$direction > 0, "blue", "red"),  # Color based on direction
     vertex.size = 10,
     vertex.label.cex = 0.8,
     layout = layout_with_fr(g))
```

Instead of distance correlation, calculate PCC instead 
```{r}
# Function to calculate PCC and p-value for a pair of vectors
calc_pcc <- function(x, y) {
  result <- cor.test(x, y, method = "pearson")
  return(c(PCC = result$estimate, p_value = result$p.value))
}

# Create a data frame of all miRNA-mRNA pairs
pairs <- expand.grid(miRNA = rownames(miRNA_norm), mRNA = rownames(mRNA_norm))

# Calculate PCC and p-value for each pair
pcc_results <- pairs %>%
  rowwise() %>%
  mutate(
    pcc_stats = list(calc_pcc(miRNA_norm[miRNA,], mRNA_norm[mRNA,]))
  ) %>%
  unnest_wider(pcc_stats)

# Adjust p-values for FDR
pcc_results <- pcc_results %>%
  mutate(adjusted_p_value = p.adjust(p_value, method = "fdr"))

# Save as csv
#write.csv(pcc_results, "../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-PCC_miRNA_mRNA.csv")
```

Merge with miranda data 
```{r}
combined_data_pcc <- pcc_results %>%
  inner_join(miranda_apul, by = c("miRNA", "mRNA"))
head(combined_data_pcc)
length(unique(combined_data_pcc$miRNA))
length(unique(combined_data_pcc$mRNA))

# Save as csv
write.csv(combined_data_pcc, "../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/miranda_PCC_miRNA_mRNA.csv")
```

Read data back in so PCC does not have to be run again
```{r}
combined_data_pcc <- read.csv("../output/09-Apul-mRNA-miRNA-interactions/miranda_PCC_miRNA_mRNA.csv")
```

Plot number of positive and negative interactions for each miRNA
```{r}
# Summarize the data
summary_data <- combined_data_pcc %>%
  group_by(miRNA) %>%
  summarise(
    positive_count = sum(PCC.cor > 0),
    negative_count = sum(PCC.cor < 0)
  ) %>%
  mutate(total_count = positive_count + negative_count) %>%
  arrange(desc(total_count))
sum(summary_data$positive_count)
sum(summary_data$negative_count)

# Assess how many miRNAs have more positive correlations vs more negative ones 
positive_dominant <- sum(summary_data$positive_count > summary_data$negative_count)
negative_dominant <- sum(summary_data$negative_count > summary_data$positive_count)
equal <- sum(summary_data$positive_count == summary_data$negative_count)

cat("miRNAs with more positive correlations:", positive_dominant, "\n")
cat("miRNAs with more negative correlations:", negative_dominant, "\n")
cat("miRNAs with equal positive and negative correlations:", equal, "\n")

# Reshape the data for plotting
plot_data <- summary_data %>%
  tidyr::pivot_longer(cols = c(positive_count, negative_count),
                      names_to = "correlation_type",
                      values_to = "count")

# Create the stacked bar plot
ggplot(plot_data, aes(x = reorder(miRNA, -total_count), y = count, fill = correlation_type)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = "miRNA", y = "Count", fill = "Correlation Type",
       title = "Positive and Negative PCC Correlations per miRNA")
ggsave("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-miRNA_positive_negative_correlations.png", last_plot(), width = 12, height = 10, dpi = 300)
ggsave("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-miRNA_positive_negative_correlations.pdf", last_plot(), width = 12, height = 10, dpi = 300)
```

Investigate data 
```{r}
# How many p-values are < 0.05 or < 0.1?
pvalue_summary_pcc <- combined_data_pcc %>%
  summarise(
    pvalue_0.05 = sum(p_value < 0.05),
    pvalue_0.1 = sum(p_value < 0.1),
  )
print(pvalue_summary_pcc)

# How many pairs have a PCC correlation > |0.5|?
corr_0.5 <- sum(abs(combined_data_pcc$PCC.cor) > 0.5)
cat("\nPairs with Pearson Correlation > 0.5:", corr_0.5, "\n")

# Are there any pairs that have a PCC correlation > |0.5| and a p-value < 0.05?
pairs_of_interest_pcc <- combined_data_pcc %>%
  filter(abs(PCC.cor) > 0.5 & p_value < 0.05 )
cat("PCC correlation > |0.5| and a p-value < 0.05:", nrow(pairs_of_interest_pcc), "\n")

# How many unique miRNAs and mRNAs have a PCC correlation > |0.5| and a p-value < 0.05?
length(unique(pairs_of_interest_pcc$miRNA))
length(unique(pairs_of_interest_pcc$mRNA))

# Save pairs of interest df as csv 
write.csv(pairs_of_interest_pcc, "../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-miranda_PCC_sig_miRNA_mRNA.csv")
```

I noticed that the significant pvalues correspond to high correlation values, whereas low correlation values are typically not significant (if using the p<0.05). 

Read data back in so PCC does not have to be run again
```{r}
pairs_of_interest_pcc <- read.csv("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-miranda_PCC_sig_miRNA_mRNA.csv")
```

Plot number of SIGNIFICANT positive and negative interactions for each miRNA
```{r}
# Summarize the data
summary_data <- pairs_of_interest_pcc %>%
  group_by(miRNA) %>%
  summarise(
    positive_count = sum(PCC.cor > 0),
    negative_count = sum(PCC.cor < 0)
  ) %>%
  mutate(total_count = positive_count + negative_count) %>%
  arrange(desc(total_count))
sum(summary_data$positive_count)
sum(summary_data$negative_count)

# Assess how many miRNAs have more positive correlations vs more negative ones 
positive_dominant <- sum(summary_data$positive_count > summary_data$negative_count)
negative_dominant <- sum(summary_data$negative_count > summary_data$positive_count)
equal <- sum(summary_data$positive_count == summary_data$negative_count)

cat("miRNAs with more positive correlations:", positive_dominant, "\n")
cat("miRNAs with more negative correlations:", negative_dominant, "\n")
cat("miRNAs with equal positive and negative correlations:", equal, "\n")

# Reshape the data for plotting
plot_data <- summary_data %>%
  tidyr::pivot_longer(cols = c(positive_count, negative_count),
                      names_to = "correlation_type",
                      values_to = "count")

# Create the stacked bar plot
ggplot(plot_data, aes(x = reorder(miRNA, -total_count), y = count, fill = correlation_type)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = "miRNA", y = "Count", fill = "Correlation Type",
       title = "Significant Positive and Negative PCC Correlations per miRNA")
ggsave("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-miRNA_significant_positive_negative_correlations.png", last_plot(), width = 12, height = 10, dpi = 300)
ggsave("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-miRNA_significant_positive_negative_correlations.pdf", last_plot(), width = 12, height = 10, dpi = 300)
```

Plot using igraph 
```{r}
# Create the graph
g <- graph_from_data_frame(pairs_of_interest_pcc, directed = FALSE)

# Add edge attributes
E(g)$weight <- abs(E(g)$PCC.cor)  # Use absolute PCC for edge weight
E(g)$color <- ifelse(E(g)$PCC.cor > 0, "blue", "red")  # Blue for positive, red for negative correlations

# Add node attributes
V(g)$type <- ifelse(V(g)$name %in% pairs_of_interest_pcc$miRNA, "miRNA", "mRNA")

# Convert to tbl_graph for ggraph
g_tbl <- as_tbl_graph(g)

# Create the plot
p <- ggraph(g_tbl, layout = "fr") +
  geom_edge_link(aes(edge_width = weight, color = color), alpha = 0.6) +
  geom_node_point(aes(color = type), size = 5) +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  scale_edge_width(range = c(0.5, 3)) +
  scale_color_manual(values = c("miRNA" = "lightblue", "mRNA" = "lightgreen", "Positive correlation" = "blue", "Negative correlation" = "red")) +
  theme_graph() +
  labs(title = "miRNA-mRNA Interaction Network",
       subtitle = "Edge width represents |PCC|, color represents correlation direction");p

ggsave("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-significant_miRNA_mRNA_network.png", p, width = 12, height = 10, dpi = 300)
```


Super cool! 

```{r}
# Function to create a scatter plot for a miRNA-mRNA pair
plot_correlation <- function(miRNA, mRNA, miRNA_data, mRNA_data, cor_value, p_value) {
  # Extract data for the specific miRNA and mRNA
  miRNA_counts <- as.numeric(miRNA_data[miRNA,])
  mRNA_counts <- as.numeric(mRNA_data[mRNA,])
  
  # Combine into a data frame
  plot_data <- data.frame(
    miRNA_counts = miRNA_counts,
    mRNA_counts = mRNA_counts,
    sample = colnames(miRNA_data)
  )
  
  # Create the plot
  p <- ggplot(plot_data, aes(x = miRNA_counts, y = mRNA_counts)) +
    geom_point(aes(color = sample), size = 3) +
    geom_smooth(method = "lm", se = TRUE, color = "red") +
    scale_x_log10() +
    scale_y_log10() +
    labs(title = paste(miRNA, "vs", mRNA),
         subtitle = paste("Correlation:", round(cor_value, 3), 
                          "| p-value:", format.pval(p_value, digits = 3)),
         x = paste(miRNA, "counts"),
         y = paste(mRNA, "counts")) +
    theme_minimal()
  
  return(p)
}

# Create plots for pairs in pairs_of_interest_pcc
plots <- lapply(1:nrow(pairs_of_interest_pcc), function(i) {
  miRNA <- pairs_of_interest_pcc$miRNA[i]
  mRNA <- pairs_of_interest_pcc$mRNA[i]
  cor_value <- pairs_of_interest_pcc$PCC.cor[i]
  p_value <- pairs_of_interest_pcc$p_value[i]
  plot_correlation(miRNA, mRNA, miRNA_norm, mRNA_norm, cor_value, p_value)
})

# Arrange and display the plots
# If you have many plots, you might want to adjust the layout
n_plots <- length(plots)
n_cols <- min(3, n_plots)  # Adjust the number of columns as needed
n_rows <- ceiling(n_plots / n_cols)

#grid.arrange(grobs = plots, ncol = n_cols, nrow = n_rows)

# Save plots to a PDF
pdf("../../D-Apul/output/09-Apul-mRNA-miRNA-interactions/Apul-correlation_plots.pdf", width = 5 * n_cols, height = 5 * n_rows)
grid.arrange(grobs = plots, ncol = n_cols, nrow = n_rows)
dev.off()
```

Need to discuss outliers with group, as there are some correlations that are being driven by outliers. Sample 140 looks like it is very different from the others though in the correlation plots. 

Next steps: 

- Think more about counts normalization
- Distance vs pearson's correlation