11-miRNA-lncRNA-summaries ================ Kathleen Durkin 2025-10-09 - 0.1 Apul - 0.2 Peve - 0.3 Ptuh Load packages ``` r library(dplyr) ``` ## ## Attaching package: 'dplyr' ## The following objects are masked from 'package:stats': ## ## filter, lag ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union ``` r library(readr) library(stringr) library(tidyr) ``` ## 0.1 Apul Reminder that miRanda output files are formatted as such: “miRNA”, “lncRNA”, “score”, “energy”, “query_start_end”, “subject_start_end”, “total_bp_shared”, “query_similar”, “subject_similar” Jill has previously suggested filtering miRNA-lncRNA binding results to retain only instances in which at least 75% of the miRNA query is complementary, so I will include this as a filter option in the below summary stats (query_similar \> 75%). Note, however, that the query may not be the entire miRNA, it is the portion of the miRNA that is predicted to bind. So, for example, the query may be positions 2 to 10 on the miRNA (likely the seed) and have 100% query similarity. This would indicate that those 8 nucleotides of the miRNA are perfectly complementary to a set of 8 nucleotides in the lncRNA, NOT that the entire \~22nt long miRNA is perfectly complementary. For now, though, I’ll include filters for just “query_similar” because that’s what Jill used. We shouldn’t need to use that filter in final results, because the join of predicted binding + coexpression is already a sufficient reduction of network size. ``` r apul_miranda <- "../../D-Apul/output/17-Apul-miRNA-lncRNA-BLASTs-RNAhybrid/Apul-miRanda-lncRNA-strict-parsed.txt" apul_pcc_link <- "https://gannet.fish.washington.edu/kdurkin1/ravenbackups/deep-dive-expression/D-Apul/output/28-Apul-miRNA-lncRNA-interactions/Apul-PCC_miRNA_lncRNA.csv" apul_pcc_loc <- "../../D-Apul/output/28-Apul-miRNA-lncRNA-interactions/Apul-PCC_miRNA_lncRNA.csv" apul_miranda_pcc <- "../../D-Apul/output/28-Apul-miRNA-lncRNA-interactions/miranda_PCC_miRNA_lncRNA.csv" # ---- Optionally PCC download file if missing ---- if (!file.exists(apul_pcc_loc)) { download.file(apul_pcc_link, apul_pcc_loc) } # ---- miRanda binding interactions ---- cat("\nNum. of miRNA-lncRNA binding interactions (miRanda):\n") ``` ## ## Num. of miRNA-lncRNA binding interactions (miRanda): ``` r num_clusters <- sum(grepl("^>Cluster", readLines(apul_miranda))) print(num_clusters) ``` ## [1] 12390 ``` r cat("\nNum. of miRNA-lncRNA bindings >75% (miRanda):\n") ``` ## ## Num. of miRNA-lncRNA bindings >75% (miRanda): ``` r miranda_df <- read.table(apul_miranda, header = FALSE, sep = "\t", quote = "", stringsAsFactors = FALSE) miranda_df$V8 <- as.numeric(str_remove(miranda_df$V8, "%")) num_75 <- sum(miranda_df$V8 >= 75, na.rm = TRUE) print(num_75) ``` ## [1] 6620 ``` r # ---- Significant PCC correlations ---- pcc_df <- read_csv(apul_pcc_loc, show_col_types = FALSE) ``` ## New names: ## • `` -> `...1` ``` r count_sig <- sum(pcc_df[[5]] < 0.05, na.rm = TRUE) count_sig_pos <- sum(pcc_df[[5]] < 0.05 & pcc_df[[4]] > 0, na.rm = TRUE) percent <- if (count_sig > 0) round((count_sig_pos / count_sig) * 100, 2) else 0 cat("\nSignificant PCC correlations + % positive\n") ``` ## ## Significant PCC correlations + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig, "\n") ``` ## Significant PCC (p<0.05): 62486 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos, "\n") ``` ## Significant (p<0.05), pos. PCC: 47187 ``` r cat("Percent positive: ", percent, "%\n") ``` ## Percent positive: 75.52 % ``` r # ---- Overlap (binding AND sig. PCC) ---- miranda_pcc <- read_csv(apul_miranda_pcc, show_col_types = FALSE) ``` ## New names: ## • `` -> `...1` ``` r # Clean columns 9, 12, 13 (remove quotes/percent) miranda_pcc <- miranda_pcc %>% mutate( across(c(9, 12, 13), ~ as.numeric(str_remove_all(as.character(.x), '["%]'))) ) count_sig2 <- sum(miranda_pcc[[13]] < 0.05, na.rm = TRUE) count_sig_pos2 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[12]] > 0, na.rm = TRUE) percent2 <- if (count_sig2 > 0) round((count_sig_pos2 / count_sig2) * 100, 2) else 0 cat("\nOverlap (binding AND sig. PCC) + % positive\n") ``` ## ## Overlap (binding AND sig. PCC) + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig2, "\n") ``` ## Significant PCC (p<0.05): 564 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos2, "\n") ``` ## Significant (p<0.05), pos. PCC: 433 ``` r cat("Percent positive: ", percent2, "%\n") ``` ## Percent positive: 76.77 % ``` r # ---- Overlap >75% (binding >75% AND PCC) ---- count_sig3 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[9]] > 75, na.rm = TRUE) count_sig_pos3 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[12]] > 0 & miranda_pcc[[9]] > 75, na.rm = TRUE) percent3 <- if (count_sig3 > 0) round((count_sig_pos3 / count_sig3) * 100, 2) else 0 cat("\nOverlap >75% (binding >75% AND PCC) + % positive\n") ``` ## ## Overlap >75% (binding >75% AND PCC) + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig3, "\n") ``` ## Significant PCC (p<0.05): 268 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos3, "\n") ``` ## Significant (p<0.05), pos. PCC: 191 ``` r cat("Percent positive: ", percent3, "%\n") ``` ## Percent positive: 71.27 % ## 0.2 Peve ``` r peve_miranda <- "../../E-Peve/output/14-Peve-miRNA-lncRNA-BLASTs-miRanda/Peve-miRanda-lncRNA-strict-parsed.txt" peve_pcc_link <- "https://gannet.fish.washington.edu/kdurkin1/ravenbackups/deep-dive-expression/E-Peve/output/15-Peve-miRNA-lncRNA-PCC/PCC_miRNA_lncRNA.csv" peve_pcc_loc <- "../../E-Peve/output/15-Peve-miRNA-lncRNA-PCC/PCC_miRNA_lncRNA.csv" peve_miranda_pcc <- "../../E-Peve/output/15-Peve-miRNA-lncRNA-PCC/miranda_PCC_miRNA_lncRNA.csv" # ---- Optionally PCC download file if missing ---- if (!file.exists(peve_pcc_loc)) { download.file(peve_pcc_link, peve_pcc_loc) } # ---- miRanda binding interactions ---- cat("\nNum. of miRNA-lncRNA binding interactions (miRanda):\n") ``` ## ## Num. of miRNA-lncRNA binding interactions (miRanda): ``` r num_clusters <- sum(grepl("^>Cluster", readLines(peve_miranda))) print(num_clusters) ``` ## [1] 4116 ``` r cat("\nNum. of miRNA-lncRNA bindings >75% (miRanda):\n") ``` ## ## Num. of miRNA-lncRNA bindings >75% (miRanda): ``` r miranda_df <- read.table(peve_miranda, header = FALSE, sep = "\t", quote = "", stringsAsFactors = FALSE) miranda_df$V8 <- as.numeric(str_remove(miranda_df$V8, "%")) num_75 <- sum(miranda_df$V8 >= 75, na.rm = TRUE) print(num_75) ``` ## [1] 2111 ``` r # ---- Significant PCC correlations ---- pcc_df <- read_csv(peve_pcc_loc, show_col_types = FALSE) ``` ## New names: ## • `` -> `...1` ``` r count_sig <- sum(pcc_df[[5]] < 0.05, na.rm = TRUE) count_sig_pos <- sum(pcc_df[[5]] < 0.05 & pcc_df[[4]] > 0, na.rm = TRUE) percent <- if (count_sig > 0) round((count_sig_pos / count_sig) * 100, 2) else 0 cat("\nSignificant PCC correlations + % positive\n") ``` ## ## Significant PCC correlations + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig, "\n") ``` ## Significant PCC (p<0.05): 18565 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos, "\n") ``` ## Significant (p<0.05), pos. PCC: 9835 ``` r cat("Percent positive: ", percent, "%\n") ``` ## Percent positive: 52.98 % ``` r # ---- Overlap (binding AND sig. PCC) ---- miranda_pcc <- read_csv(peve_miranda_pcc, show_col_types = FALSE) ``` ## New names: ## • `` -> `...1` ``` r # Clean columns 9, 12, 13 (remove quotes/percent) miranda_pcc <- miranda_pcc %>% mutate( across(c(9, 12, 13), ~ as.numeric(str_remove_all(as.character(.x), '["%]'))) ) count_sig2 <- sum(miranda_pcc[[13]] < 0.05, na.rm = TRUE) count_sig_pos2 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[12]] > 0, na.rm = TRUE) percent2 <- if (count_sig2 > 0) round((count_sig_pos2 / count_sig2) * 100, 2) else 0 cat("\nOverlap (binding AND sig. PCC) + % positive\n") ``` ## ## Overlap (binding AND sig. PCC) + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig2, "\n") ``` ## Significant PCC (p<0.05): 175 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos2, "\n") ``` ## Significant (p<0.05), pos. PCC: 106 ``` r cat("Percent positive: ", percent2, "%\n") ``` ## Percent positive: 60.57 % ``` r # ---- Overlap >75% (binding >75% AND PCC) ---- count_sig3 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[9]] > 75, na.rm = TRUE) count_sig_pos3 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[12]] > 0 & miranda_pcc[[9]] > 75, na.rm = TRUE) percent3 <- if (count_sig3 > 0) round((count_sig_pos3 / count_sig3) * 100, 2) else 0 cat("\nOverlap >75% (binding >75% AND PCC) + % positive\n") ``` ## ## Overlap >75% (binding >75% AND PCC) + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig3, "\n") ``` ## Significant PCC (p<0.05): 86 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos3, "\n") ``` ## Significant (p<0.05), pos. PCC: 44 ``` r cat("Percent positive: ", percent3, "%\n") ``` ## Percent positive: 51.16 % ## 0.3 Ptuh ``` r ptuh_miranda <- "../../F-Ptuh/output/14-Ptuh-miRNA-lncRNA-BLASTs-miRanda/Ptuh-miRanda-lncRNA-strict-parsed.txt" ptuh_pcc_link <- "https://gannet.fish.washington.edu/kdurkin1/ravenbackups/deep-dive-expression/F-Ptuh/output/15-Ptuh-miRNA-lncRNA-PCC/PCC_miRNA_lncRNA.csv" ptuh_pcc_loc <- "../../F-Ptuh/output/15-Ptuh-miRNA-lncRNA-PCC/PCC_miRNA_lncRNA.csv" ptuh_miranda_pcc <- "../../F-Ptuh/output/15-Ptuh-miRNA-lncRNA-PCC/miranda_PCC_miRNA_lncRNA.csv" # ---- Optionally PCC download file if missing ---- if (!file.exists(ptuh_pcc_loc)) { download.file(ptuh_pcc_link, ptuh_pcc_loc) } # ---- File line counts ---- cat("Num. of lines in miranda_PCC file:\n") ``` ## Num. of lines in miranda_PCC file: ``` r print(length(readLines(ptuh_miranda_pcc))) ``` ## [1] 8342 ``` r # ---- miRanda binding interactions ---- cat("\nNum. of miRNA-lncRNA binding interactions (miRanda):\n") ``` ## ## Num. of miRNA-lncRNA binding interactions (miRanda): ``` r num_clusters <- sum(grepl("^>Cluster", readLines(ptuh_miranda))) print(num_clusters) ``` ## [1] 8341 ``` r cat("\nNum. of miRNA-lncRNA bindings >75% (miRanda):\n") ``` ## ## Num. of miRNA-lncRNA bindings >75% (miRanda): ``` r miranda_df <- read.table(ptuh_miranda, header = FALSE, sep = "\t", quote = "", stringsAsFactors = FALSE) miranda_df$V8 <- as.numeric(str_remove(miranda_df$V8, "%")) num_75 <- sum(miranda_df$V8 >= 75, na.rm = TRUE) print(num_75) ``` ## [1] 4246 ``` r # ---- Significant PCC correlations ---- pcc_df <- read_csv(ptuh_pcc_loc, show_col_types = FALSE) ``` ## New names: ## • `` -> `...1` ``` r count_sig <- sum(pcc_df[[5]] < 0.05, na.rm = TRUE) count_sig_pos <- sum(pcc_df[[5]] < 0.05 & pcc_df[[4]] > 0, na.rm = TRUE) percent <- if (count_sig > 0) round((count_sig_pos / count_sig) * 100, 2) else 0 cat("\nSignificant PCC correlations + % positive\n") ``` ## ## Significant PCC correlations + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig, "\n") ``` ## Significant PCC (p<0.05): 41931 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos, "\n") ``` ## Significant (p<0.05), pos. PCC: 28344 ``` r cat("Percent positive: ", percent, "%\n") ``` ## Percent positive: 67.6 % ``` r # ---- Overlap (binding AND sig. PCC) ---- miranda_pcc <- read_csv(ptuh_miranda_pcc, show_col_types = FALSE) ``` ## New names: ## • `` -> `...1` ``` r # Clean columns 9, 12, 13 (remove quotes/percent) miranda_pcc <- miranda_pcc %>% mutate( across(c(9, 12, 13), ~ as.numeric(str_remove_all(as.character(.x), '["%]'))) ) count_sig2 <- sum(miranda_pcc[[13]] < 0.05, na.rm = TRUE) count_sig_pos2 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[12]] > 0, na.rm = TRUE) percent2 <- if (count_sig2 > 0) round((count_sig_pos2 / count_sig2) * 100, 2) else 0 cat("\nOverlap (binding AND sig. PCC) + % positive\n") ``` ## ## Overlap (binding AND sig. PCC) + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig2, "\n") ``` ## Significant PCC (p<0.05): 564 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos2, "\n") ``` ## Significant (p<0.05), pos. PCC: 311 ``` r cat("Percent positive: ", percent2, "%\n") ``` ## Percent positive: 55.14 % ``` r # ---- Overlap >75% (binding >75% AND PCC) ---- count_sig3 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[9]] > 75, na.rm = TRUE) count_sig_pos3 <- sum(miranda_pcc[[13]] < 0.05 & miranda_pcc[[12]] > 0 & miranda_pcc[[9]] > 75, na.rm = TRUE) percent3 <- if (count_sig3 > 0) round((count_sig_pos3 / count_sig3) * 100, 2) else 0 cat("\nOverlap >75% (binding >75% AND PCC) + % positive\n") ``` ## ## Overlap >75% (binding >75% AND PCC) + % positive ``` r cat("Significant PCC (p<0.05): ", count_sig3, "\n") ``` ## Significant PCC (p<0.05): 216 ``` r cat("Significant (p<0.05), pos. PCC: ", count_sig_pos3, "\n") ``` ## Significant (p<0.05), pos. PCC: 130 ``` r cat("Percent positive: ", percent3, "%\n") ``` ## Percent positive: 60.19 %