---
title: "03-expression-matrix-correlations"
format: html
editor: visual
---

lncRNA–mRNA Correlation Pipeline
Description: Complete, reproducible workflow that normalises lncRNA and mRNA count
            matrices with DESeq2 and calculates pair‑wise Pearson correlations                  between every lncRNA–mRNA pair.
Expected input files (tab‑ or comma‑separated):
• lncrna_counts.tsv – genes/lncRNAs in rows, samples in columns
• mrna_counts.tsv   – mRNAs in rows,        samples in identical columns/order
Output files:
• correlations_full.tsv  – matrix of r values (lncRNAs × mRNAs)
• correlations_padj.tsv  – matrix of Benjamini‑Hochberg adjusted p‑values
• significant_pairs.tsv  – table of pairs passing |r| >= 0.7 & padj <= 0.05
The code uses block processing so memory stays < ~4 GB even for large datasets.

**Update:** This version auto‑handles the specific formats you just uploaded:
* `lncRNA_counts.txt` – featureCounts output (6 metadata cols + raw counts)
* `mRNA_counts.txt`   – Kallisto counts table (transcript IDs + numeric counts)

We strip metadata columns, harmonise sample names (e.g. *sample140*), round the
Kallisto numeric counts to integers for DESeq2, then run the same DESeq2 → VST
 → pair‑wise Pearson correlation workflow as before.

# Apul
## 1  Libraries
```{r}
library(data.table)
library(DESeq2)
library(WGCNA)      # p-values + block-wise tools
if (!requireNamespace("bigmemory", quietly = TRUE)) install.packages("bigmemory")
library(bigmemory)  # RAM-friendly matrices
library(dplyr)
```

## 2  Read count matrices & tidy formatting
```{r}
# ---------- 2.0  Load the raw tables ---------------------------------------
lnc_raw  <- fread(cmd = "grep -v '^#' ~/github/deep-dive-expression/M-multi-species/data/03-expression-matrix-correlations/Apul/lncRNA_counts.txt")
mrna_raw <- fread("~/github/deep-dive-expression/M-multi-species/data/03-expression-matrix-correlations/Apul/mRNA_counts.txt")

# ---------- 2.1  Quick sanity counts *before* you touch rownames() ----------
cat("\nlnc_raw rows :", nrow(lnc_raw),
    "| unique Geneid :", length(unique(lnc_raw$Geneid)),
    "| blanks :", sum(lnc_raw$Geneid == ""), "\n")

cat("mrna_raw rows:", nrow(mrna_raw),
    "| unique IDs   :", length(unique(mrna_raw[[1]])),
    "| blanks :", sum(mrna_raw[[1]] == ""), "\n\n")

# ---------- 2.2  Drop blank IDs (if any) and make duplicates unique ---------
lnc_raw  <- lnc_raw [Geneid != ""]
mrna_raw <- mrna_raw[mrna_raw[[1]] != ""]

lnc_ids  <- make.unique(lnc_raw$Geneid)
mrna_ids <- make.unique(mrna_raw[[1]])

# ---------- 2.3  Build matrices with row-names that now *stick* -------------
lnc_mat <- as.matrix(as.data.frame(lnc_raw[, -(1:6)], row.names = lnc_ids))
mrna_mat <- as.matrix(as.data.frame(mrna_raw[, -1], row.names = mrna_ids))

# ---------- 2.4  Harmonise column (sample) names ----------------------------
clean_lnc <- function(x) sub(".*RNA-ACR-([0-9]+).*", "sample\\1", x)
colnames(lnc_mat)  <- clean_lnc(colnames(lnc_mat))
colnames(mrna_mat) <- sub("^kallisto_quant_", "", colnames(mrna_mat))
colnames(mrna_mat) <- sub("\\.\\d+$", "", colnames(mrna_mat))  # strip ".1"

# ---------- 2.5  Round Kallisto counts to integers -------------------------
mrna_mat <- round(mrna_mat)

# ---------- 2.6  Assign back to the workflow objects ------------------------
lnc_counts  <- lnc_mat
mrna_counts <- mrna_mat

head(lnc_counts)
head(mrna_counts)
```

### 2.1  Match samples across matrices
```{r}
common <- intersect(colnames(lnc_counts), colnames(mrna_counts))
stopifnot(length(common) >= 2)
lnc_counts  <- lnc_counts [, common]
mrna_counts <- mrna_counts[, common]
```

## 3  Filter low-expressed genes
```{r}
min_samples <- 3
min_counts  <- 10
keep_lnc  <- rowSums(lnc_counts  >= min_counts) >= min_samples
keep_mrna <- rowSums(mrna_counts >= min_counts) >= min_samples
lnc_counts  <- lnc_counts [keep_lnc , ]
mrna_counts <- mrna_counts[keep_mrna, ]
```

## 4  Joint DESeq2 normalisation & variance-stabilising transform
```{r}
stopifnot(nrow(lnc_counts) == length(rownames(lnc_counts)))
stopifnot(nrow(mrna_counts) == length(rownames(mrna_counts)))

combined <- rbind(lnc_counts, mrna_counts)
dds <- DESeqDataSetFromMatrix(countData = combined,
                              colData = data.frame(cond = factor(rep("all", ncol(combined))),
                     row.names = colnames(combined)),
                              design    = ~ 1)

dds <- estimateSizeFactors(dds)
vst_mat <- assay(vst(dds, blind = TRUE))

lnc_vst  <- vst_mat[rownames(lnc_counts) , ]
mrna_vst <- vst_mat[rownames(mrna_counts), ]
```

## 5  Block-wise Pearson correlations
```{r}
block_size <- 2000  # tweak for RAM
nr_lnc  <- nrow(lnc_vst)
nr_mrna <- nrow(mrna_vst)
cor_mat  <- big.matrix(nrow = nr_lnc, ncol = nr_mrna, type = "double")
cor_padj <- big.matrix(nrow = nr_lnc, ncol = nr_mrna, type = "double")

for (start in seq(1, nr_lnc, by = block_size)) {
  end <- min(start + block_size - 1, nr_lnc)
  rblk <- cor(t(lnc_vst[start:end, ]), t(mrna_vst), method = "pearson")
  pblk <- corPvalueStudent(rblk, nSamples = ncol(lnc_vst))
  cor_mat [start:end, ] <- rblk
  cor_padj[start:end, ] <- p.adjust(pblk, method = "BH")
  cat(sprintf("processed %d–%d\n", start, end))
}

options(bigmemory.allow.dimnames = TRUE)
dimnames(cor_mat)  <- list(rownames(lnc_vst),  rownames(mrna_vst))
dimnames(cor_padj) <- dimnames(cor_mat)
```

## 6  Export results
```{r}
# full matrices (beware large size!)
fwrite(as.data.table(as.matrix(cor_mat),  keep.rownames = "lncRNA"), "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/correlations_full.tsv")
fwrite(as.data.table(as.matrix(cor_padj), keep.rownames = "lncRNA"), "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/correlations_padj.tsv")

# significant pairs (|r| ≥ 0.7 & padj ≤ 0.05)
sig <- which(abs(cor_mat[]) >= 0.7 & cor_padj[] <= 0.05, arr.ind = TRUE) |> as.data.frame()
colnames(sig) <- c("lnc_idx", "mrna_idx")
sig <- sig |>
  mutate(lncRNA = rownames(cor_mat)[lnc_idx],
         mRNA   = colnames(cor_mat)[mrna_idx],
         r      = cor_mat [cbind(lnc_idx, mrna_idx)],
         padj   = cor_padj[cbind(lnc_idx, mrna_idx)]) |>
  select(lncRNA, mRNA, r, padj)

fwrite(sig, "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/significant_pairs.tsv")
```

## 7  Session info
```{r}
sessionInfo()
```

## 8 Visualization
```{r}
library(ggplot2)
library(reshape2)
library(tidyr)

# 1. Take top 20 by lowest padj
top_sig <- sig |> arrange(padj) |> head(20)

# 2. Create correlation matrix (wide form)
wide_df <- top_sig |> arrange(padj) |> head(20) |>
  select(lncRNA, mRNA, r) |>
  pivot_wider(names_from = mRNA, values_from = r)

heat_df <- as.data.frame(wide_df)
rownames(heat_df) <- heat_df$lncRNA
heat_df <- heat_df[, -1]

# 3. Melt into long format
heat_long <- melt(as.matrix(heat_df), varnames = c("lncRNA", "mRNA"), value.name = "correlation")

# 4. Plot
ggplot(heat_long, aes(x = mRNA, y = lncRNA, fill = correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0,
                       limits = c(-1, 1), name = "Pearson r") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid = element_blank()) +
  labs(title = "Top 20 lncRNA–mRNA Correlations",
       x = "mRNA", y = "lncRNA")

```

# Peve
## 1  Libraries
```{r}
library(data.table)
library(DESeq2)
library(WGCNA)      # p-values + block-wise tools
if (!requireNamespace("bigmemory", quietly = TRUE)) install.packages("bigmemory")
library(bigmemory)  # RAM-friendly matrices
library(dplyr)
```

## 2  Read count matrices & tidy formatting
```{bash}
#download mRNA matrix
curl -o ~/github/deep-dive-expression/M-multi-species/data/03-expression-matrix-correlations/Peve/mRNA_counts.txt https://raw.githubusercontent.com/urol-e5/deep-dive/main/E-Peve/output/12-Peve-RNAseq-kallisto/kallisto/kallisto.isoform.counts.matrix
```

```{r}
# ---------- 2.0  Load the raw tables ---------------------------------------
lnc_raw  <- fread(cmd = "grep -v '^#' ~/github/deep-dive-expression/M-multi-species/data/03-expression-matrix-correlations/Peve/lncRNA_counts.txt")
mrna_raw <- fread("~/github/deep-dive-expression/M-multi-species/data/03-expression-matrix-correlations/Peve/mRNA_counts.txt")

# ---------- 2.1  Quick sanity counts *before* you touch rownames() ----------
cat("\nlnc_raw rows :", nrow(lnc_raw),
    "| unique Geneid :", length(unique(lnc_raw$Geneid)),
    "| blanks :", sum(lnc_raw$Geneid == ""), "\n")

cat("mrna_raw rows:", nrow(mrna_raw),
    "| unique IDs   :", length(unique(mrna_raw[[1]])),
    "| blanks :", sum(mrna_raw[[1]] == ""), "\n\n")

# ---------- 2.2  Drop blank IDs (if any) and make duplicates unique ---------
lnc_raw  <- lnc_raw [Geneid != ""]
mrna_raw <- mrna_raw[mrna_raw[[1]] != ""]

lnc_ids  <- make.unique(lnc_raw$Geneid)
mrna_ids <- make.unique(mrna_raw[[1]])

# ---------- 2.3  Build matrices with row-names that now *stick* -------------
lnc_mat <- as.matrix(as.data.frame(lnc_raw[, -(1:6)], row.names = lnc_ids))
mrna_mat <- as.matrix(as.data.frame(mrna_raw[, -1], row.names = mrna_ids))

# ---------- 2.4  Harmonise column (sample) names ----------------------------
# This matches RNA-POR-71-S1-TP2.sorted.bam and extracts "71"
clean_lnc <- function(x) sub(".*RNA-POR-([0-9]+)-.*", "sample\\1", x)
colnames(lnc_mat)  <- clean_lnc(colnames(lnc_mat))
colnames(mrna_mat) <- sub("^kallisto_quant_", "", colnames(mrna_mat))
colnames(mrna_mat) <- sub("\\.\\d+$", "", colnames(mrna_mat))

common <- intersect(colnames(lnc_mat), colnames(mrna_mat))
length(common)  # Should now be >= 2

# ---------- 2.5  Round Kallisto counts to integers -------------------------
mrna_mat <- round(mrna_mat)

# ---------- 2.6  Assign back to the workflow objects ------------------------
lnc_counts  <- lnc_mat
mrna_counts <- mrna_mat

head(lnc_counts)
head(mrna_counts)
```

### 2.1  Match samples across matrices
```{r}
common <- intersect(colnames(lnc_counts), colnames(mrna_counts))
stopifnot(length(common) >= 2)
lnc_counts  <- lnc_counts [, common]
mrna_counts <- mrna_counts[, common]
```

## 3  Filter low-expressed genes
```{r}
min_samples <- 3
min_counts  <- 10
keep_lnc  <- rowSums(lnc_counts  >= min_counts) >= min_samples
keep_mrna <- rowSums(mrna_counts >= min_counts) >= min_samples
lnc_counts  <- lnc_counts [keep_lnc , ]
mrna_counts <- mrna_counts[keep_mrna, ]
```

## 4  Joint DESeq2 normalisation & variance-stabilising transform
```{r}
stopifnot(nrow(lnc_counts) == length(rownames(lnc_counts)))
stopifnot(nrow(mrna_counts) == length(rownames(mrna_counts)))

combined <- rbind(lnc_counts, mrna_counts)
dds <- DESeqDataSetFromMatrix(countData = combined,
                              colData = data.frame(cond = factor(rep("all", ncol(combined))),
                     row.names = colnames(combined)),
                              design    = ~ 1)

dds <- estimateSizeFactors(dds)
vst_mat <- assay(vst(dds, blind = TRUE))

lnc_vst  <- vst_mat[rownames(lnc_counts) , ]
mrna_vst <- vst_mat[rownames(mrna_counts), ]
```

## 5  Block-wise Pearson correlations
```{r}
block_size <- 2000  # tweak for RAM
nr_lnc  <- nrow(lnc_vst)
nr_mrna <- nrow(mrna_vst)
cor_mat  <- big.matrix(nrow = nr_lnc, ncol = nr_mrna, type = "double")
cor_padj <- big.matrix(nrow = nr_lnc, ncol = nr_mrna, type = "double")

for (start in seq(1, nr_lnc, by = block_size)) {
  end <- min(start + block_size - 1, nr_lnc)
  rblk <- cor(t(lnc_vst[start:end, ]), t(mrna_vst), method = "pearson")
  pblk <- corPvalueStudent(rblk, nSamples = ncol(lnc_vst))
  cor_mat [start:end, ] <- rblk
  cor_padj[start:end, ] <- p.adjust(pblk, method = "BH")
  cat(sprintf("processed %d–%d\n", start, end))
}

options(bigmemory.allow.dimnames = TRUE)
dimnames(cor_mat)  <- list(rownames(lnc_vst),  rownames(mrna_vst))
dimnames(cor_padj) <- dimnames(cor_mat)
```

## 6  Export results
```{r}
# full matrices (beware large size!)
fwrite(as.data.table(as.matrix(cor_mat),  keep.rownames = "lncRNA"), "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/Peve/correlations_full.tsv")
fwrite(as.data.table(as.matrix(cor_padj), keep.rownames = "lncRNA"), "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/Peve/correlations_padj.tsv")

# significant pairs (|r| ≥ 0.7 & padj ≤ 0.05)
sig <- which(abs(cor_mat[]) >= 0.7 & cor_padj[] <= 0.05, arr.ind = TRUE) |> as.data.frame()
colnames(sig) <- c("lnc_idx", "mrna_idx")
sig <- sig |>
  mutate(lncRNA = rownames(cor_mat)[lnc_idx],
         mRNA   = colnames(cor_mat)[mrna_idx],
         r      = cor_mat [cbind(lnc_idx, mrna_idx)],
         padj   = cor_padj[cbind(lnc_idx, mrna_idx)]) |>
  select(lncRNA, mRNA, r, padj)

fwrite(sig, "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/Peve/significant_pairs.tsv")
```

## 7  Session info
```{r}
sessionInfo()
```

## 8 Visualization
```{r}
library(ggplot2)
library(reshape2)
library(tidyr)

# 1. Take top 20 by lowest padj
top_sig <- sig |> arrange(padj) |> head(20)

# 2. Create correlation matrix (wide form)
wide_df <- top_sig |> arrange(padj) |> head(20) |>
  select(lncRNA, mRNA, r) |>
  pivot_wider(names_from = mRNA, values_from = r)

heat_df <- as.data.frame(wide_df)
rownames(heat_df) <- heat_df$lncRNA
heat_df <- heat_df[, -1]

# 3. Melt into long format
heat_long <- melt(as.matrix(heat_df), varnames = c("lncRNA", "mRNA"), value.name = "correlation")

# 4. Plot
ggplot(heat_long, aes(x = mRNA, y = lncRNA, fill = correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0,
                       limits = c(-1, 1), name = "Pearson r") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid = element_blank()) +
  labs(title = "Top lncRNA–mRNA Correlations",
       x = "mRNA", y = "lncRNA")

```

# Ptuh
## 1  Libraries
```{r}
library(data.table)
library(DESeq2)
library(WGCNA)      # p-values + block-wise tools
if (!requireNamespace("bigmemory", quietly = TRUE)) install.packages("bigmemory")
library(bigmemory)  # RAM-friendly matrices
library(dplyr)
```

## 2  Read count matrices & tidy formatting
```{bash}
#download mRNA matrix
curl -o ~/github/deep-dive-expression/M-multi-species/data/03-expression-matrix-correlations/Ptuh/mRNA_counts.txt https://raw.githubusercontent.com/urol-e5/deep-dive/main/F-Pmea/output/14-Pmea-RNAseq-kallisto/kallisto/kallisto.isoform.counts.matrix

```

```{r}
# ---------- 2.0  Load the raw tables ---------------------------------------
lnc_raw  <- fread(cmd = "grep -v '^#' ~/github/deep-dive-expression/M-multi-species/data/03-expression-matrix-correlations/Ptuh/lncRNA_counts.txt")
mrna_raw <- fread("~/github/deep-dive-expression/M-multi-species/data/03-expression-matrix-correlations/Ptuh/mRNA_counts.txt")

# ---------- 2.1  Quick sanity counts *before* you touch rownames() ----------
cat("\nlnc_raw rows :", nrow(lnc_raw),
    "| unique Geneid :", length(unique(lnc_raw$Geneid)),
    "| blanks :", sum(lnc_raw$Geneid == ""), "\n")

cat("mrna_raw rows:", nrow(mrna_raw),
    "| unique IDs   :", length(unique(mrna_raw[[1]])),
    "| blanks :", sum(mrna_raw[[1]] == ""), "\n\n")

# ---------- 2.2  Drop blank IDs (if any) and make duplicates unique ---------
lnc_raw  <- lnc_raw [Geneid != ""]
mrna_raw <- mrna_raw[mrna_raw[[1]] != ""]

lnc_ids  <- make.unique(lnc_raw$Geneid)
mrna_ids <- make.unique(mrna_raw[[1]])

# ---------- 2.3  Build matrices with row-names that now *stick* -------------
lnc_mat <- as.matrix(as.data.frame(lnc_raw[, -(1:6)], row.names = lnc_ids))
mrna_mat <- as.matrix(as.data.frame(mrna_raw[, -1], row.names = mrna_ids))

# ---------- 2.4  Harmonise column (sample) names ----------------------------
clean_lnc <- function(x) sub(".*RNA-POC-([0-9]+)-.*", "sample\\1", x)
colnames(lnc_mat)  <- clean_lnc(colnames(lnc_mat))
colnames(mrna_mat) <- sub("^kallisto_quant_", "", colnames(mrna_mat))
colnames(mrna_mat) <- sub("\\.\\d+$", "", colnames(mrna_mat))

common <- intersect(colnames(lnc_mat), colnames(mrna_mat))
length(common)  # Should now be >= 2

# ---------- 2.5  Round Kallisto counts to integers -------------------------
mrna_mat <- round(mrna_mat)

# ---------- 2.6  Assign back to the workflow objects ------------------------
lnc_counts  <- lnc_mat
mrna_counts <- mrna_mat

head(lnc_counts)
head(mrna_counts)
```

### 2.1  Match samples across matrices
```{r}
common <- intersect(colnames(lnc_counts), colnames(mrna_counts))
stopifnot(length(common) >= 2)
lnc_counts  <- lnc_counts [, common]
mrna_counts <- mrna_counts[, common]
```

## 3  Filter low-expressed genes
```{r}
min_samples <- 3
min_counts  <- 10
keep_lnc  <- rowSums(lnc_counts  >= min_counts) >= min_samples
keep_mrna <- rowSums(mrna_counts >= min_counts) >= min_samples
lnc_counts  <- lnc_counts [keep_lnc , ]
mrna_counts <- mrna_counts[keep_mrna, ]
```

## 4  Joint DESeq2 normalisation & variance-stabilising transform
```{r}
stopifnot(nrow(lnc_counts) == length(rownames(lnc_counts)))
stopifnot(nrow(mrna_counts) == length(rownames(mrna_counts)))

combined <- rbind(lnc_counts, mrna_counts)
dds <- DESeqDataSetFromMatrix(countData = combined,
                              colData = data.frame(cond = factor(rep("all", ncol(combined))),
                     row.names = colnames(combined)),
                              design    = ~ 1)

dds <- estimateSizeFactors(dds)
vst_mat <- assay(vst(dds, blind = TRUE))

lnc_vst  <- vst_mat[rownames(lnc_counts) , ]
mrna_vst <- vst_mat[rownames(mrna_counts), ]
```

## 5  Block-wise Pearson correlations
```{r}
block_size <- 2000  # tweak for RAM
nr_lnc  <- nrow(lnc_vst)
nr_mrna <- nrow(mrna_vst)
cor_mat  <- big.matrix(nrow = nr_lnc, ncol = nr_mrna, type = "double")
cor_padj <- big.matrix(nrow = nr_lnc, ncol = nr_mrna, type = "double")

for (start in seq(1, nr_lnc, by = block_size)) {
  end <- min(start + block_size - 1, nr_lnc)
  rblk <- cor(t(lnc_vst[start:end, ]), t(mrna_vst), method = "pearson")
  pblk <- corPvalueStudent(rblk, nSamples = ncol(lnc_vst))
  cor_mat [start:end, ] <- rblk
  cor_padj[start:end, ] <- p.adjust(pblk, method = "BH")
  cat(sprintf("processed %d–%d\n", start, end))
}

options(bigmemory.allow.dimnames = TRUE)
dimnames(cor_mat)  <- list(rownames(lnc_vst),  rownames(mrna_vst))
dimnames(cor_padj) <- dimnames(cor_mat)
```

## 6  Export results
```{r}
# full matrices (beware large size!)
fwrite(as.data.table(as.matrix(cor_mat),  keep.rownames = "lncRNA"), "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/Ptuh/correlations_full.tsv")
fwrite(as.data.table(as.matrix(cor_padj), keep.rownames = "lncRNA"), "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/Ptuh/correlations_padj.tsv")

# significant pairs (|r| ≥ 0.7 & padj ≤ 0.05)
sig <- which(abs(cor_mat[]) >= 0.7 & cor_padj[] <= 0.05, arr.ind = TRUE) |> as.data.frame()
colnames(sig) <- c("lnc_idx", "mrna_idx")
sig <- sig |>
  mutate(lncRNA = rownames(cor_mat)[lnc_idx],
         mRNA   = colnames(cor_mat)[mrna_idx],
         r      = cor_mat [cbind(lnc_idx, mrna_idx)],
         padj   = cor_padj[cbind(lnc_idx, mrna_idx)]) |>
  select(lncRNA, mRNA, r, padj)

fwrite(sig, "~/github/deep-dive-expression/M-multi-species/output/03-expression-matrix-correlations/Ptuh/significant_pairs.tsv")
```

## 7  Session info
```{r}
sessionInfo()
```

## 8 Visualization
```{r}
library(ggplot2)
library(reshape2)
library(tidyr)

# 1. Take top 20 by lowest padj
top_sig <- sig |> arrange(padj) |> head(20)

# 2. Create correlation matrix (wide form)
wide_df <- top_sig |> arrange(padj) |> head(20) |>
  select(lncRNA, mRNA, r) |>
  pivot_wider(names_from = mRNA, values_from = r)

heat_df <- as.data.frame(wide_df)
rownames(heat_df) <- heat_df$lncRNA
heat_df <- heat_df[, -1]

# 3. Melt into long format
heat_long <- melt(as.matrix(heat_df), varnames = c("lncRNA", "mRNA"), value.name = "correlation")

# 4. Plot
ggplot(heat_long, aes(x = mRNA, y = lncRNA, fill = correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0,
                       limits = c(-1, 1), name = "Pearson r") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.grid = element_blank()) +
  labs(title = "Top lncRNA–mRNA Correlations",
       x = "mRNA", y = "lncRNA")

```