--- title: "WGCNA analysis of ortholog expression" author: "Ariana S Huffmyer, E5 RoL Team" date: "2025" output: html_document: code_folding: hide toc: yes toc_depth: 6 toc_float: yes editor_options: chunk_output_type: console --- # Set up ```{r} library(vegan) library(mixOmics) library(ggplot2) library(tidyverse) library(DESeq2) library(factoextra) library(WGCNA) library(ComplexHeatmap) ``` ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) # The following setting is important, do not omit. options(stringsAsFactors = FALSE) #Set Strings to character ``` # Load data Read in data generated for ortholog expression ```{r} orthos<-read_csv("M-multi-species/output/14-pca-orthologs/vst_counts_matrix.csv") str(orthos) metadata<-read_csv("master-molecular-metadata.csv")%>%dplyr::select(colony, timepoint, site, species) str(metadata) ``` # WGCNA ```{r} ## ========================= ## WGCNA on ortholog matrix ## ========================= # Packages suppressPackageStartupMessages({ library(tidyverse) library(WGCNA) }) # Speed / reproducibility options(stringsAsFactors = FALSE) allowWGCNAThreads() # uses multi-core where possible ## ------------------------- ## 1) Prepare input matrices ## ------------------------- # Expect: orthos has first column 'group_id' then sample columns stopifnot("group_id" %in% names(orthos)) # Keep a clean copy orthos_df <- orthos %>% as.data.frame() # Pull sample columns (everything except group_id) sample_cols <- setdiff(names(orthos_df), "group_id") ``` ```{r} # --- Build a clean metadata with string cols and an explicit sample_id --- metadata <- metadata %>% as.data.frame() %>% # drop tibble/S4 wrappers mutate(across(everything(), as.character)) %>% mutate(sample_id = paste(colony, timepoint, sep = "-")) # Check for duplicates in sample_id (these would break ordering) dup_ids <- metadata$sample_id[duplicated(metadata$sample_id)] if (length(dup_ids) > 0) { stop("Duplicate sample_id(s) in metadata: ", paste(unique(dup_ids), collapse = ", ")) } # --- Order metadata to match the expression columns using a left_join --- # This preserves the order of sample_cols and is robust to Rle types. metadata <- tibble(sample_id = setdiff(names(orthos), "group_id")) %>% left_join(metadata, by = "sample_id") # Sanity check for missing joins missing_meta <- metadata %>% filter(is.na(colony) | is.na(timepoint)) if (nrow(missing_meta) > 0) { stop( "These expression samples are missing from metadata (no join): ", paste(missing_meta$sample_id, collapse = ", ") ) } ``` ```{r} # Rebuild the expression matrix to match this order expr_mat <- orthos %>% as.data.frame() %>% column_to_rownames("group_id") %>% .[, metadata$sample_id, drop = FALSE] %>% # force column order to match metadata t() %>% as.data.frame() identical(rownames(expr_mat), metadata$sample_id) # Sanity checks nrow(expr_mat) == nrow(metadata) identical(rownames(expr_mat), metadata$sample_id) ``` ```{r} dim(expr_mat) # Optional: remove genes with too much missingness / low variance # WGCNA will also check, but a pre-filter can help speed. # e.g., keep genes with variance > 0 across samples gene_var <- apply(expr_mat, 2, var, na.rm = TRUE) expr_mat <- expr_mat[, gene_var > 0] dim(expr_mat) #removed only 4 genes, most have variance >0 # Let WGCNA check for good samples/genes gsg <- goodSamplesGenes(expr_mat, verbose = 3) if (!gsg$allOK) { if (sum(!gsg$goodGenes) > 0) message("Removing genes: ", paste(colnames(expr_mat)[!gsg$goodGenes], collapse = ", ")) if (sum(!gsg$goodSamples) > 0) message("Removing samples: ", paste(rownames(expr_mat)[!gsg$goodSamples], collapse = ", ")) expr_mat <- expr_mat[gsg$goodSamples, gsg$goodGenes] metadata <- metadata[gsg$goodSamples, , drop = FALSE] } ``` ```{r} ## ------------------------- ## 2) Build trait matrix ## ------------------------- # We’ll correlate module eigengenes with timepoint and site (categorical). # Use one-hot encoding (model.matrix) to get numeric contrasts. # We'll drop intercept to avoid collinearity. traits_timepoint <- model.matrix(~ 0 + factor(metadata$timepoint)) colnames(traits_timepoint) <- gsub("^factor\\(metadata\\$timepoint\\)", "", colnames(traits_timepoint)) traits_site <- model.matrix(~ 0 + factor(metadata$site)) colnames(traits_site) <- gsub("^factor\\(metadata\\$site\\)", "", colnames(traits_site)) traits_species <- model.matrix(~ 0 + factor(metadata$species)) colnames(traits_species) <- gsub("^factor\\(metadata\\$species\\)", "", colnames(traits_species)) # Combine into one trait matrix trait_mat <- cbind(traits_timepoint, traits_site, traits_species) head(trait_mat) rownames(trait_mat) <- metadata$sample_id head(trait_mat) ``` Add physiology into the trait matrix ```{r} phys<-read_csv("M-multi-species/data/phys_master_timeseries.csv") head(phys) phys<-phys%>% dplyr::select(sample_id, Host_AFDW.mg.cm2, Sym_AFDW.mg.cm2, Am, AQY, Rd, Ik, Ic, calc.umol.cm2.hr, cells.mgAFDW, prot_mg.mgafdw, cre.umol.mgafdw, Ratio_AFDW.mg.cm2, Total_Chl, Total_Chl_cell)%>% mutate(sample_id=str_replace(sample_id, "_", "-")) str(trait_mat) str(phys) ``` ```{r} # 1) Make 'phys' row-indexable by sample_id and drop the id column phys_by_id <- phys %>% distinct(sample_id, .keep_all = TRUE) %>% # just in case there are duplicates column_to_rownames("sample_id") %>% as.data.frame() # 2) Reorder 'phys' rows to match trait_mat rownames (keeps NAs for any missing) phys_aligned <- phys_by_id[rownames(trait_mat), , drop = FALSE] # 3) Bind to the right of trait_mat, preserving matrix type # (columns in phys are numeric per your str(), so this stays numeric) trait_mat_plus <- cbind(trait_mat, as.matrix(phys_aligned)) # 4) (Optional) Make sure column names are unique if there’s any overlap colnames(trait_mat_plus) <- make.unique(colnames(trait_mat_plus)) # ---- quick sanity checks (optional) ---- # any trait_mat rows missing in phys? setdiff(rownames(trait_mat), rownames(phys_by_id)) # any phys rows not in trait_mat? setdiff(rownames(phys_aligned), rownames(trait_mat)) ``` ```{r} ## -------------------------------------- ## 3) Pick soft-thresholding power (beta) ## -------------------------------------- powers <- c(seq(1, 10), seq(12, 20, by = 2)) sft <- pickSoftThreshold(expr_mat, powerVector = powers, networkType = "signed", verbose = 5) # Choose the smallest power achieving scale-free topology fit ~0.8 (or your preference). # If none reach 0.8, pick the elbow / max fit. fitIdx <- which.max(sft$fitIndices[, "SFT.R.sq"] >= 0.8) if (length(fitIdx) == 0) { # fallback: best overall fit fitIdx <- which.max(sft$fitIndices[, "SFT.R.sq"]) } softPower <- sft$fitIndices[fitIdx, "Power"] message("Chosen soft-threshold power: ", softPower) ## -------------------------------------- ## Plot scale-free topology and connectivity ## -------------------------------------- par(mfrow = c(1, 2)) cex1 <- 0.9 # 1. Scale-free topology fit index as a function of power plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2], xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2", type = "n", main = paste("Scale independence")) text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2], labels = powers, cex = cex1, col = "blue") abline(h = 0.8, col = "red", lty = 2) abline(v = softPower, col = "red", lty = 3) # 2. Mean connectivity as a function of power plot(sft$fitIndices[, 1], sft$fitIndices[, 5], xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n", main = paste("Mean connectivity")) text(sft$fitIndices[, 1], sft$fitIndices[, 5], labels = powers, cex = cex1, col = "blue") abline(v = softPower, col = "red", lty = 3) par(mfrow = c(1, 1)) dev.off() ``` Soft power = 16 ```{r} ## ----------------------------------- ## 4) Build network and detect modules ## ----------------------------------- # Use blockwiseModules for convenience and speed. net <- blockwiseModules( expr_mat, power = softPower, networkType = "signed", TOMType = "signed", corType = "pearson", minModuleSize = 30, # adjust based on your goals mergeCutHeight = 0.25, # ~cor 0.75 between MEs numericLabels = TRUE, pamRespectsDendro = TRUE, saveTOMs = FALSE, verbose = 3 ) # Extract module labels and colors module_labels <- net$colors module_colors <- labels2colors(net$colors) MEs <- net$MEs # Order eigengenes by hierarchical clustering MEs <- orderMEs(MEs) ``` ```{r} ## -------------------------------------------- ## 5) Correlate module eigengenes with the traits ## -------------------------------------------- # Ensure row order matches identical(rownames(MEs), rownames(trait_mat_plus)) mod_trait_cor <- cor(MEs, trait_mat_plus, use = "p") mod_trait_pval <- corPvalueStudent(mod_trait_cor, nSamples = nrow(expr_mat)) # --- Tidy module–trait correlation table (with BH-adjusted p per trait) --- # Correlations mod_trait_cor_df <- as.data.frame(mod_trait_cor) %>% tibble::rownames_to_column(var = "module") %>% tidyr::pivot_longer(-module, names_to = "trait", values_to = "cor") # P-values mod_trait_pval_df <- as.data.frame(mod_trait_pval) %>% tibble::rownames_to_column(var = "module") %>% tidyr::pivot_longer(-module, names_to = "trait", values_to = "p") # Join + tidy mod_trait_tbl <- mod_trait_cor_df %>% dplyr::left_join(mod_trait_pval_df, by = c("module", "trait")) %>% dplyr::mutate( cor = as.numeric(cor), p = as.numeric(p) ) %>% dplyr::group_by(trait) %>% dplyr::mutate(p_adj = p.adjust(p, method = "BH")) %>% dplyr::ungroup() %>% dplyr::arrange(trait, dplyr::desc(abs(cor))) print( mod_trait_tbl %>% dplyr::group_by(trait) %>% dplyr::slice_max(order_by = abs(cor), n = 5) %>% dplyr::ungroup() ) ``` ```{r} ## ---------------------------------------------------- ## 6) Map genes (orthogroups) to modules & save outputs ## ---------------------------------------------------- # Gene (orthogroup) IDs gene_ids <- colnames(expr_mat) gene_module_df <- tibble( group_id = gene_ids, module_label = module_labels, module_color = labels2colors(module_labels) ) # Example: save gene lists per module color write_csv(gene_module_df, "M-multi-species/output/18-ortholog-wgcna/gene_module_assignments.csv") #summarize number of genes in each module module_counts <- gene_module_df %>% as.data.frame() %>% dplyr::count(module_label, name = "n_genes") %>% arrange(desc(n_genes)) print(module_counts) ``` ```{r} ## -------------------------------------- ## 8) Optional: Module membership / kME ## -------------------------------------- # Correlate each gene with its module eigengene (kME) and attach p-values. kME <- as.data.frame(cor(expr_mat, MEs, use = "p")) kME_p <- as.data.frame(corPvalueStudent(as.matrix(kME), nSamples = nrow(expr_mat))) kME_tbl <- kME %>% rownames_to_column(var = "group_id") %>% as_tibble() %>% left_join(gene_module_df, by = "group_id") # Example: list top hub-like orthogroups within a module (by |kME|) top_hubs <- kME_tbl %>% mutate(across(starts_with("ME"), abs)) %>% rowwise() %>% mutate(kME_in_own = { me_col <- paste0("ME", module_color) if (me_col %in% colnames(kME_tbl)) get(me_col) else NA_real_ }) %>% ungroup() %>% arrange(module_color, desc(kME_in_own)) %>% group_by(module_color) %>% slice_head(n = 20) %>% ungroup() print(top_hubs) ``` ```{r} ## -------------------------------------- ## 9) Heatmap with compact row dendrogram + show text only if p<0.05 ## -------------------------------------- # --- Build text and significance mask --- text_matrix <- matrix("", nrow = nrow(mod_trait_cor), ncol = ncol(mod_trait_cor), dimnames = dimnames(mod_trait_cor)) sig_mask <- mod_trait_pval < 0.05 for (i in seq_len(nrow(mod_trait_cor))) { for (j in seq_len(ncol(mod_trait_cor))) { if (isTRUE(sig_mask[i, j])) { r <- signif(mod_trait_cor[i, j], 2) p <- signif(mod_trait_pval[i, j], 1) text_matrix[i, j] <- paste0(r, "\n(", p, ")") } else { text_matrix[i, j] <- "" # hide non-significant } } } # --- Cluster rows (modules) --- row_cor <- cor(t(mod_trait_cor), use = "pairwise.complete.obs") row_d <- as.dist(1 - row_cor) row_hc <- hclust(row_d, method = "average") rord <- row_hc$order mat_re <- mod_trait_cor[rord, , drop = FALSE] text_re <- text_matrix[rord, , drop = FALSE] sig_mask_re <- sig_mask[rord, , drop = FALSE] yLabs_re <- rownames(mod_trait_cor)[rord] xLabs_re <- colnames(trait_mat_plus) # --- Compact layout: narrower dendrogram panel --- op <- par(no.readonly = TRUE) layout(matrix(c(1, 2), nrow = 1), widths = c(0.6, 5)) ## (1) Row dendrogram par(mar = c(10, 0, 3, 0)) plot(as.dendrogram(row_hc), horiz = TRUE, axes = FALSE, yaxs = "i", xaxs = "i", xlab = "", ylab = "", main = "", xlim = c(max(row_hc$height) * 1.05, 0)) ## (2) Heatmap with reduced left margin par(mar = c(10, 5, 3, 3)) WGCNA::labeledHeatmap( Matrix = mat_re, xLabels = xLabs_re, yLabels = yLabs_re, ySymbols = yLabs_re, colorLabels = FALSE, colors = WGCNA::blueWhiteRed(50), textMatrix = text_re, # already blank where p>0.05 setStdMargins = FALSE, cex.text = 0.6, zlim = c(-1, 1), main = "Module–Trait Correlations\n(cor with p-values, p<0.05 shown only)" ) # Overlay bold text for significant cells nr <- nrow(mat_re) nc <- ncol(mat_re) for (i in seq_len(nr)) { for (j in seq_len(nc)) { if (isTRUE(sig_mask_re[i, j])) { text(x = j, y = nr - i + 1, labels = text_re[i, j], cex = 0.6, font = 2) # bold } } } # reset par(op); layout(1) ``` ```{r} ## -------------------------------------- ## 10) Export per-module gene lists to CSV ## -------------------------------------- # Output locations out_base <- "M-multi-species/output/18-ortholog-wgcna" out_dir <- file.path(out_base, "module_gene_lists") dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) # Ensure we have gene -> module mapping if (!exists("gene_module_df")) { gene_ids <- colnames(expr_mat) gene_module_df <- tibble( group_id = gene_ids, module_label = module_labels, module_color = labels2colors(module_labels) ) } # Ensure we have kME table (gene-to-ME correlations) if (!exists("kME_tbl")) { kME <- as.data.frame(cor(expr_mat, MEs, use = "p")) kME_tbl <- kME %>% rownames_to_column(var = "group_id") %>% as_tibble() %>% left_join(gene_module_df, by = "group_id") } # Ensure a per-gene kME for its own module ("kME_in_own") if (!"kME_in_own" %in% names(kME_tbl)) { kME_tbl <- kME_tbl %>% mutate(across(starts_with("ME"), as.numeric)) %>% rowwise() %>% mutate(kME_in_own = { me_col <- paste0("ME", module_color) if (me_col %in% colnames(cur_data_all())) cur_data_all()[[me_col]] else NA_real_ }) %>% ungroup() } # 1) One CSV per module with its gene list ordered by hub-likeness module_levels <- sort(unique(gene_module_df$module_color)) for (col in module_levels) { df <- kME_tbl %>% filter(module_color == col) %>% arrange(desc(kME_in_own)) %>% dplyr::select(group_id, module_label, module_color, kME_in_own) safe_col <- gsub("[^A-Za-z0-9]+", "-", col) outfile <- file.path(out_dir, paste0("genes_in_", safe_col, "_module.csv")) write_csv(df, outfile) } # 2) A single combined CSV of all genes with assignments and kMEs combined <- kME_tbl %>% arrange(module_color, desc(kME_in_own)) %>% dplyr::select(group_id, module_label)%>% dplyr::rename(wgcna_module=module_label) write_csv(combined, file.path(out_base, "wgcna_ortholog_module_assignments.csv")) ``` # Plot eigengenes Plot eigengene expression across factors of interest ```{r} ## -------------------------------------- ## 11) Plot MEs with significance stars ## (stars denote p < 0.05 in ME~trait correlation) ## -------------------------------------- suppressPackageStartupMessages(library(ggplot2)) suppressPackageStartupMessages(library(broom)) # --- Prepare long-form ME table with metadata & phys ------------------------- me_long <- MEs %>% as.data.frame() %>% rownames_to_column("sample_id") %>% pivot_longer(starts_with("ME"), names_to = "ME", values_to = "eigengene") %>% mutate(module_color = sub("^ME", "", ME)) meta_min <- metadata %>% dplyr::select(sample_id, species, timepoint, site) %>% mutate( species = as.factor(species), timepoint = as.factor(timepoint), site = as.factor(site) ) phys_df <- phys_aligned %>% as.data.frame() %>% rownames_to_column("sample_id") me_annot <- me_long %>% left_join(meta_min, by = "sample_id") %>% left_join(phys_df, by = "sample_id") # Identify numeric phys columns available (for scatter panels & sig lookup) phys_numeric_cols <- names(phys_df)[names(phys_df) != "sample_id"] phys_numeric_cols <- phys_numeric_cols[sapply(phys_df[phys_numeric_cols], is.numeric)] # --- Build a tidy p-value table from mod_trait_pval for significance lookups -- pval_tbl <- as.data.frame(mod_trait_pval) %>% rownames_to_column("ME") %>% pivot_longer(-ME, names_to = "trait", values_to = "p") %>% mutate(module_color = sub("^ME", "", ME), sig = p < 0.05) # Factor-level sets (as created earlier) timepoint_cols <- colnames(traits_timepoint) site_cols <- colnames(traits_site) species_cols <- colnames(traits_species) # Significance by factor (panel-level) sig_timepoint <- pval_tbl %>% filter(trait %in% timepoint_cols) %>% group_by(module_color) %>% summarise(sig = any(sig, na.rm = TRUE), .groups = "drop") sig_site <- pval_tbl %>% filter(trait %in% site_cols) %>% group_by(module_color) %>% summarise(sig = any(sig, na.rm = TRUE), .groups = "drop") sig_species <- pval_tbl %>% filter(trait %in% species_cols) %>% group_by(module_color) %>% summarise(sig = any(sig, na.rm = TRUE), .groups = "drop") # Significance for phys metrics: specific (metric, module_color) pairs sig_phys <- pval_tbl %>% filter(trait %in% phys_numeric_cols) %>% transmute(metric = trait, module_color, sig) # --- y-positions for star placement (top-left of each panel) ----------------- panel_y_species <- me_annot %>% group_by(module_color) %>% summarise(y = max(eigengene, na.rm = TRUE), .groups = "drop") %>% left_join(sig_species, by = "module_color") %>% filter(sig == TRUE) %>% mutate(label = "★") panel_y_time <- me_annot %>% group_by(module_color) %>% summarise(y = max(eigengene, na.rm = TRUE), .groups = "drop") %>% left_join(sig_timepoint, by = "module_color") %>% filter(sig == TRUE) %>% mutate(label = "★") panel_y_site <- me_annot %>% group_by(module_color) %>% summarise(y = max(eigengene, na.rm = TRUE), .groups = "drop") %>% left_join(sig_site, by = "module_color") %>% filter(sig == TRUE) %>% mutate(label = "★") panel_y_phys <- me_annot %>% pivot_longer(cols = any_of(phys_numeric_cols), names_to = "metric", values_to = "metric_value") %>% group_by(metric, module_color) %>% summarise(y = max(eigengene, na.rm = TRUE), .groups = "drop") %>% left_join(sig_phys, by = c("metric", "module_color")) %>% filter(sig == TRUE) %>% mutate(label = "★") # A small, clean theme theme_me <- theme_bw(base_size = 11) + theme( panel.grid.minor = element_blank(), strip.background = element_rect(fill = "grey95", color = "grey85"), legend.position = "none", axis.text.x = element_text(angle=45, hjust=1) ) # -------- Categorical factors: species / timepoint / site -------------------- # Species p_species <- ggplot(me_annot, aes(x = species, y = eigengene, fill = species)) + geom_boxplot(outlier.shape = NA, alpha = 0.7) + geom_jitter(width = 0.15, size = 0.8, alpha = 0.6) + facet_wrap(~ module_color, scales = "free_y") + geom_text( data = panel_y_species, aes(x = -Inf, y = y, label = label), inherit.aes = FALSE, hjust = -0.2, vjust = 1.1, size = 6, color="red" ) + labs(title = "Module eigengenes by species (★ p<0.05)", x = "Species", y = "Module eigengene") + theme_me print(p_species) # Save plot out_plot_dir <- file.path(out_base, "module_eigengene_factor_panels") dir.create(out_plot_dir, recursive = TRUE, showWarnings = FALSE) ggsave( filename = file.path(out_plot_dir, "MEs_by_species.png"), plot = p_species, width = 10, height = 6, dpi = 300 ) # Timepoint p_time <- ggplot(me_annot, aes(x = timepoint, y = eigengene, fill = timepoint)) + geom_boxplot(outlier.shape = NA, alpha = 0.7) + geom_jitter(width = 0.15, size = 0.8, alpha = 0.6) + facet_wrap(~ module_color, scales = "free_y") + geom_text( data = panel_y_time, aes(x = -Inf, y = y, label = label), inherit.aes = FALSE, hjust = -0.2, vjust = 1.1, size = 6, color="red" ) + labs(title = "Module eigengenes by timepoint (★ p<0.05)", x = "Timepoint", y = "Module eigengene") + theme_me print(p_time) ggsave( filename = file.path(out_plot_dir, "MEs_by_time.png"), plot = p_time, width = 10, height = 6, dpi = 300 ) # Site p_site <- ggplot(me_annot, aes(x = site, y = eigengene, fill = site)) + geom_boxplot(outlier.shape = NA, alpha = 0.7) + geom_jitter(width = 0.15, size = 0.8, alpha = 0.6) + facet_wrap(~ module_color, scales = "free_y") + geom_text( data = panel_y_site, aes(x = -Inf, y = y, label = label), inherit.aes = FALSE, hjust = -0.2, vjust = 1.1, size = 6, color="red" ) + labs(title = "Module eigengenes by site (★ p<0.05)", x = "Site", y = "Module eigengene") + theme_me print(p_site) ggsave( filename = file.path(out_plot_dir, "MEs_by_site.png"), plot = p_site, width = 10, height = 6, dpi = 300 ) # -------------------- Physiological metrics (continuous) --------------------- # One plot per metric, faceted by module_color, with significance stars # Long-form data me_phys_long <- me_annot %>% pivot_longer( cols = any_of(phys_numeric_cols), names_to = "metric", values_to = "metric_value" ) # Significance table (TRUE if ME~metric correlation p<0.05) # Uses previously built pval_tbl and phys_numeric_cols sig_phys <- pval_tbl %>% filter(trait %in% phys_numeric_cols) %>% transmute(metric = trait, module_color, sig) # Helper: build one plot for a given metric plot_metric_panel <- function(metric_name) { d <- me_phys_long %>% filter(metric == metric_name) # Per-(metric,module) star y-position (top + padding) and x at left edge star_df <- d %>% group_by(module_color) %>% summarise( y_star = max(eigengene, na.rm = TRUE) + 0.05 * (diff(range(eigengene, na.rm = TRUE)) + 1e-8), x_star = suppressWarnings(min(metric_value, na.rm = TRUE)), .groups = "drop" ) %>% left_join(sig_phys %>% filter(metric == metric_name), by = "module_color") %>% filter(!is.na(sig) & sig) %>% mutate(label = "★", x_star = ifelse(is.finite(x_star), x_star, 0)) ggplot(d, aes(x = metric_value, y = eigengene)) + geom_point(alpha = 0.6, size = 1) + geom_smooth(method = "lm", se = TRUE) + facet_wrap(~ module_color, scales = "free_y") + geom_text( data = star_df, aes(x = x_star, y = y_star, label = label), inherit.aes = FALSE, size = 5, vjust = 1, color="red" ) + labs( title = paste0("Module eigengenes vs. ", metric_name, " (★ p<0.05)"), x = metric_name, y = "Module eigengene" ) + coord_cartesian(clip = "off") + theme_me } # Build all metric plots as a named list and print each metric_plots <- setNames( lapply(phys_numeric_cols, plot_metric_panel), phys_numeric_cols ) # Print each plot in the document (one panel per metric) invisible(lapply(metric_plots, print)) # ----------------------------------------- # Save each panel as a PNG in out_base dir # ----------------------------------------- out_plot_dir <- file.path(out_base, "module_eigengene_metric_panels") dir.create(out_plot_dir, recursive = TRUE, showWarnings = FALSE) for (metric_name in names(metric_plots)) { outfile <- file.path(out_plot_dir, paste0("MEs_vs_", metric_name, ".png")) message("Saving: ", outfile) ggsave( filename = outfile, plot = metric_plots[[metric_name]], width = 8, height = 6, dpi = 300 ) } ```