Hypothesis Overview

Main Hypothesis: “Plastic vs buffered calcification is encoded by epigenetic lability of ion-transport genes”

Species differ in how epigenetically labile key calcification genes are:

  • Acropora pulchra: High epigenetic responsiveness (dynamic DNA methylation) at calcification genes, enabling rapid changes but causing instability
  • Porites evermanni: Epigenetic buffering (stable methylation, reduced regulatory turnover), enabling sustained calcification under stress

“Frontloading” vs “Reacting” Hypothesis

  • Porites evermanni exhibits constitutive high gene body methylation (GBM) in key calcification genes (Ca-ATPase, Carbonic Anhydrase), leading to stable, high-level transcription (“Frontloading”)
  • Acropora pulchra relies on promoter methylation changes to dynamically regulate these genes

Expected Observations

  1. Under stable conditions: Porites shows lower variance in methylation and expression compared to Acropora
  2. Under heat stress (TP2): Acropora shows massive epigenetic remodeling (DMRs) associated with calcification collapse; Porites methylation profiles remain rigid

Target Gene Systems

  • Ca²⁺ transport: PMCAs, Ca²⁺ channels
  • H⁺ removal / pH regulation: V-type ATPases, SLC transporters
  • Carbon supply: carbonic anhydrases (CA)

Setup and Libraries

library(tidyverse)
library(corrplot)
library(pheatmap)
library(RColorBrewer)
library(viridis)
library(ggpubr)
library(cowplot)
library(broom)

# Set theme
theme_set(theme_minimal(base_size = 12))

# Species lookup
species_lookup <- c(
  apul = "Acropora pulchra",
  peve = "Porites evermanni",
  ptua = "Pocillopora tuahiniensis"
)

# Color palette for species
species_colors <- c(
  "Acropora pulchra" = "#E64B35",
  "Porites evermanni" = "#4DBBD5",
  "Pocillopora tuahiniensis" = "#00A087"
)

# Timepoint colors
tp_colors <- c("TP1" = "#2166AC", "TP2" = "#D6604D", "TP3" = "#4393C3", "TP4" = "#92C5DE")
output_dir <- "../output/51-HypA-epigenetic-lability"
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

Load Data

Expression Matrices for Calcification Genes

# Load biomineralization gene expression counts
apul_biomin <- read_csv("https://raw.githubusercontent.com/urol-e5/timeseries-molecular-calcification/refs/heads/main/M-multi-species/output/33-biomin-pathway-counts/apul_biomin_counts.csv",
                        show_col_types = FALSE)

peve_biomin <- read_csv("https://raw.githubusercontent.com/urol-e5/timeseries-molecular-calcification/refs/heads/main/M-multi-species/output/33-biomin-pathway-counts/peve_biomin_counts.csv",
                        show_col_types = FALSE)

ptua_biomin <- read_csv("https://raw.githubusercontent.com/urol-e5/timeseries-molecular-calcification/refs/heads/main/M-multi-species/output/33-biomin-pathway-counts/ptua_biomin_counts.csv",
                        show_col_types = FALSE)

Gene Body Methylation Data

# Acropora pulchra methylation
apul_meth <- read_tsv("https://raw.githubusercontent.com/urol-e5/timeseries-molecular-calcification/refs/heads/main/D-Apul/output/40-Apul-Gene-Methylation/Apul-gene-methylation_75pct.tsv",
                      show_col_types = FALSE)

# Porites evermanni methylation
peve_meth <- read_tsv("https://raw.githubusercontent.com/urol-e5/timeseries-molecular-calcification/refs/heads/main/E-Peve/output/15-Peve-Gene-Methylation/Peve-gene-methylation_75pct.tsv",
                      show_col_types = FALSE)

# Pocillopora tuahiniensis methylation
ptua_meth <- read_tsv("https://raw.githubusercontent.com/urol-e5/timeseries-molecular-calcification/refs/heads/main/F-Ptua/output/09-Ptua-Gene-Methylation/Ptua-gene-methylation_75pct.tsv",
                      show_col_types = FALSE)

Annotation File

ortholog_annot <- read_csv("../output/12-ortho-annot/ortholog_groups_annotated.csv",
                           show_col_types = FALSE)

Exon-Level Expression Data

# Exon summary by ortholog for expression variability analysis
apul_exon_summary <- read_csv("../output/40-exon-count-matrix/apul-exon_summary_by_ortholog.csv",
                              show_col_types = FALSE)

peve_exon_summary <- read_csv("../output/40-exon-count-matrix/peve-exon_summary_by_ortholog.csv",
                              show_col_types = FALSE)

ptua_exon_summary <- read_csv("../output/40-exon-count-matrix/ptua-exon_summary_by_ortholog.csv",
                              show_col_types = FALSE)

Data Wrangling

Reshape Expression Data

# Function to reshape expression data to long format
reshape_expression <- function(df, species_code) {
  df %>%
    pivot_longer(
      cols = -c(group_id, gene_id),
      names_to = "sample_id",
      values_to = "count"
    ) %>%
    mutate(
      species_code = species_code,
      colony_id = str_remove(sample_id, "-TP\\d+$"),
      timepoint = str_extract(sample_id, "TP\\d+"),
      timepoint_num = parse_number(timepoint),
      log_count = log2(count + 1)
    )
}

# Reshape all species
expr_long <- bind_rows(
  reshape_expression(apul_biomin, "apul"),
  reshape_expression(peve_biomin, "peve"),
  reshape_expression(ptua_biomin, "ptua")
) %>%
  mutate(species = species_lookup[species_code])

head(expr_long)
## # A tibble: 6 × 10
##   group_id gene_id    sample_id   count species_code colony_id timepoint
##   <chr>    <chr>      <chr>       <dbl> <chr>        <chr>     <chr>    
## 1 OG_13910 FUN_002435 ACR-139-TP1 16875 apul         ACR-139   TP1      
## 2 OG_13910 FUN_002435 ACR-139-TP2 28318 apul         ACR-139   TP2      
## 3 OG_13910 FUN_002435 ACR-139-TP3 32788 apul         ACR-139   TP3      
## 4 OG_13910 FUN_002435 ACR-139-TP4 51133 apul         ACR-139   TP4      
## 5 OG_13910 FUN_002435 ACR-145-TP1 34014 apul         ACR-145   TP1      
## 6 OG_13910 FUN_002435 ACR-145-TP2 15116 apul         ACR-145   TP2      
## # ℹ 3 more variables: timepoint_num <dbl>, log_count <dbl>, species <chr>

Reshape Methylation Data

# Function to reshape methylation data
reshape_methylation <- function(df, species_code) {
  df %>%
    pivot_longer(
      cols = -gene_id,
      names_to = "sample_id",
      values_to = "methylation"
    ) %>%
    mutate(
      species_code = species_code,
      colony_id = str_remove(sample_id, "-TP\\d+$"),
      timepoint = str_extract(sample_id, "TP\\d+"),
      timepoint_num = parse_number(timepoint)
    )
}

# Reshape all species methylation
meth_long <- bind_rows(
  reshape_methylation(apul_meth, "apul") %>% mutate(gene_id = str_remove(gene_id, "^gene-")),
  reshape_methylation(peve_meth, "peve") %>% mutate(gene_id = str_remove(gene_id, "^gene-")),
  reshape_methylation(ptua_meth, "ptua") %>% mutate(gene_id = str_remove(gene_id, "^gene-"))
) %>%
  mutate(species = species_lookup[species_code])

head(meth_long)
## # A tibble: 6 × 8
##   gene_id   sample_id methylation species_code colony_id timepoint timepoint_num
##   <chr>     <chr>           <dbl> <chr>        <chr>     <chr>             <dbl>
## 1 FUN_0001… ACR-139-…       2.16  apul         ACR-139   TP1                   1
## 2 FUN_0001… ACR-139-…       1.23  apul         ACR-139   TP2                   2
## 3 FUN_0001… ACR-139-…       0.491 apul         ACR-139   TP3                   3
## 4 FUN_0001… ACR-139-…       1.34  apul         ACR-139   TP4                   4
## 5 FUN_0001… ACR-145-…       0.649 apul         ACR-145   TP1                   1
## 6 FUN_0001… ACR-145-…       0.589 apul         ACR-145   TP2                   2
## # ℹ 1 more variable: species <chr>

Identify Biomineralization Genes

# Get unique biomineralization gene IDs for each species
biomin_genes <- list(
  apul = unique(apul_biomin$gene_id),
  peve = unique(peve_biomin$gene_id),
  ptua = unique(ptua_biomin$gene_id)
)

biomin_orthogroups <- unique(c(apul_biomin$group_id, peve_biomin$group_id, ptua_biomin$group_id))

cat("Number of biomineralization genes per species:\n")
## Number of biomineralization genes per species:
sapply(biomin_genes, length)
## apul peve ptua 
##  505  565  483
cat("\nTotal unique orthogroups:", length(biomin_orthogroups), "\n")
## 
## Total unique orthogroups: 657

Filter Methylation Data for Biomin Genes

# Filter methylation data to biomineralization genes
biomin_meth <- meth_long %>%
  filter(
    (species_code == "apul" & gene_id %in% biomin_genes$apul) |
    (species_code == "peve" & gene_id %in% biomin_genes$peve) |
    (species_code == "ptua" & gene_id %in% biomin_genes$ptua)
  )

cat("Biomin genes with methylation data:\n")
## Biomin genes with methylation data:
biomin_meth %>%
  group_by(species_code) %>%
  summarise(n_genes = n_distinct(gene_id)) %>%
  print()
## # A tibble: 3 × 2
##   species_code n_genes
##   <chr>          <int>
## 1 apul             485
## 2 peve             483
## 3 ptua             459

Analysis 1: Methylation Variability Across Species

Calculate Methylation Variance per Gene

# Calculate methylation variance for each gene within and across timepoints
meth_variance <- biomin_meth %>%
  filter(!is.na(methylation)) %>%
  group_by(species_code, species, gene_id) %>%
  summarise(
    mean_meth = mean(methylation, na.rm = TRUE),
    sd_meth = sd(methylation, na.rm = TRUE),
    cv_meth = sd_meth / mean_meth,  # Coefficient of variation
    n_samples = n(),
    .groups = "drop"
  ) %>%
  filter(!is.na(cv_meth) & is.finite(cv_meth))

# Summary by species
meth_variance_summary <- meth_variance %>%
  group_by(species_code, species) %>%
  summarise(
    mean_cv = mean(cv_meth, na.rm = TRUE),
    median_cv = median(cv_meth, na.rm = TRUE),
    sd_cv = sd(cv_meth, na.rm = TRUE),
    n_genes = n(),
    .groups = "drop"
  )

print(meth_variance_summary)
## # A tibble: 3 × 6
##   species_code species                  mean_cv median_cv sd_cv n_genes
##   <chr>        <chr>                      <dbl>     <dbl> <dbl>   <int>
## 1 apul         Acropora pulchra           0.598     0.494 0.406     485
## 2 peve         Porites evermanni          0.617     0.549 0.350     483
## 3 ptua         Pocillopora tuahiniensis   0.439     0.385 0.315     459

Visualization: Methylation Variability Distribution

# Boxplot of methylation CV by species
p1 <- ggplot(meth_variance, aes(x = species, y = cv_meth, fill = species)) +
  geom_boxplot(alpha = 0.7, outlier.alpha = 0.3) +
  geom_jitter(width = 0.2, alpha = 0.2, size = 0.5) +
  scale_fill_manual(values = species_colors) +
  labs(
    title = "Methylation Variability (CV) Across Biomineralization Genes",
    subtitle = "Hypothesis: Acropora should show higher CV than Porites",
    x = "Species",
    y = "Coefficient of Variation (CV) in Methylation"
  ) +
  theme(legend.position = "none",
        axis.text.x = element_text(face = "italic"))

p1

ggsave(file.path(output_dir, "meth_cv_boxplot.png"), p1, width = 10, height = 6, dpi = 300)

Statistical Test: Compare Methylation Variance

# Kruskal-Wallis test for methylation CV across species
kw_meth <- kruskal.test(cv_meth ~ species_code, data = meth_variance)
print(kw_meth)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  cv_meth by species_code
## Kruskal-Wallis chi-squared = 92.138, df = 2, p-value < 2.2e-16
# Pairwise Wilcoxon tests
pairwise_meth <- pairwise.wilcox.test(meth_variance$cv_meth, meth_variance$species_code,
                                       p.adjust.method = "bonferroni")
print(pairwise_meth)
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  meth_variance$cv_meth and meth_variance$species_code 
## 
##      apul    peve   
## peve 0.21    -      
## ptua 5.7e-13 < 2e-16
## 
## P value adjustment method: bonferroni
# Save results
meth_test_results <- list(
  kruskal_wallis = kw_meth,
  pairwise_wilcox = pairwise_meth
)

Analysis 2: Mean Gene Body Methylation Levels

Compare Baseline GBM Levels

# Calculate mean methylation level per gene
mean_gbm <- biomin_meth %>%
  filter(!is.na(methylation)) %>%
  group_by(species_code, species, gene_id) %>%
  summarise(
    mean_meth = mean(methylation, na.rm = TRUE),
    .groups = "drop"
  )

# Summary by species
gbm_summary <- mean_gbm %>%
  group_by(species_code, species) %>%
  summarise(
    grand_mean = mean(mean_meth, na.rm = TRUE),
    median_meth = median(mean_meth, na.rm = TRUE),
    sd_meth = sd(mean_meth, na.rm = TRUE),
    n_genes = n(),
    .groups = "drop"
  )

print(gbm_summary)
## # A tibble: 3 × 6
##   species_code species                  grand_mean median_meth sd_meth n_genes
##   <chr>        <chr>                         <dbl>       <dbl>   <dbl>   <int>
## 1 apul         Acropora pulchra               7.05       0.980   13.6      485
## 2 peve         Porites evermanni              6.71       0.869   14.3      483
## 3 ptua         Pocillopora tuahiniensis       3.74       0.527    7.96     459

Visualization: Mean GBM Distribution

# Density plot of mean methylation by species
p2 <- ggplot(mean_gbm, aes(x = mean_meth, fill = species, color = species)) +
  geom_density(alpha = 0.4) +
  scale_fill_manual(values = species_colors) +
  scale_color_manual(values = species_colors) +
  coord_cartesian(ylim = c(0, .025)) +
  labs(
    title = "Distribution of Mean Gene Body Methylation in Biomineralization Genes",
    subtitle = "Hypothesis: Porites shows higher baseline GBM (Frontloading)",
    x = "Mean Methylation (%)",
    y = "Density"
  ) +
  theme(legend.position = "bottom",
        legend.text = element_text(face = "italic"))

p2

ggsave(file.path(output_dir, "gbm_density.png"), p2, width = 10, height = 6, dpi = 300)

Statistical Test: Compare Mean GBM

# Kruskal-Wallis test for mean GBM across species
kw_gbm <- kruskal.test(mean_meth ~ species_code, data = mean_gbm)
print(kw_gbm)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  mean_meth by species_code
## Kruskal-Wallis chi-squared = 83.382, df = 2, p-value < 2.2e-16
# Pairwise Wilcoxon tests
pairwise_gbm <- pairwise.wilcox.test(mean_gbm$mean_meth, mean_gbm$species_code,
                                      p.adjust.method = "bonferroni")
print(pairwise_gbm)
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  mean_gbm$mean_meth and mean_gbm$species_code 
## 
##      apul    peve   
## peve 1       -      
## ptua 3.2e-13 < 2e-16
## 
## P value adjustment method: bonferroni

Analysis 3: Expression Variability Across Timepoints

Calculate Expression Variance

# Calculate expression variance per gene
expr_variance <- expr_long %>%
  group_by(species_code, species, group_id, gene_id) %>%
  summarise(
    mean_count = mean(count, na.rm = TRUE),
    sd_count = sd(count, na.rm = TRUE),
    cv_expr = sd_count / mean_count,
    mean_log = mean(log_count, na.rm = TRUE),
    sd_log = sd(log_count, na.rm = TRUE),
    n_samples = n(),
    .groups = "drop"
  ) %>%
  filter(!is.na(cv_expr) & is.finite(cv_expr) & mean_count > 10)  # Filter low-expressed genes

# Summary by species
expr_var_summary <- expr_variance %>%
  group_by(species_code, species) %>%
  summarise(
    mean_cv = mean(cv_expr, na.rm = TRUE),
    median_cv = median(cv_expr, na.rm = TRUE),
    sd_cv = sd(cv_expr, na.rm = TRUE),
    n_genes = n(),
    .groups = "drop"
  )

print(expr_var_summary)
## # A tibble: 3 × 6
##   species_code species                  mean_cv median_cv sd_cv n_genes
##   <chr>        <chr>                      <dbl>     <dbl> <dbl>   <int>
## 1 apul         Acropora pulchra           0.871     0.777 0.409     460
## 2 peve         Porites evermanni          0.918     0.824 0.441     447
## 3 ptua         Pocillopora tuahiniensis   0.803     0.729 0.382     435

Visualization: Expression Variability

p3 <- ggplot(expr_variance, aes(x = species, y = cv_expr, fill = species)) +
  geom_boxplot(alpha = 0.7, outlier.alpha = 0.3) +
  geom_jitter(width = 0.2, alpha = 0.2, size = 0.5) +
  scale_fill_manual(values = species_colors) +
  scale_y_continuous(limits = c(0, quantile(expr_variance$cv_expr, 0.95))) +
  labs(
    title = "Expression Variability (CV) in Biomineralization Genes",
    subtitle = "Hypothesis: Acropora shows higher transcriptional variance",
    x = "Species",
    y = "Coefficient of Variation (CV) in Expression"
  ) +
  theme(legend.position = "none",
        axis.text.x = element_text(face = "italic"))

p3

ggsave(file.path(output_dir, "expr_cv_boxplot.png"), p3, width = 10, height = 6, dpi = 300)

Statistical Test: Compare Expression Variance

# Kruskal-Wallis test
kw_expr <- kruskal.test(cv_expr ~ species_code, data = expr_variance)
print(kw_expr)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  cv_expr by species_code
## Kruskal-Wallis chi-squared = 22.479, df = 2, p-value = 1.315e-05
# Pairwise tests
pairwise_expr <- pairwise.wilcox.test(expr_variance$cv_expr, expr_variance$species_code,
                                       p.adjust.method = "bonferroni")
print(pairwise_expr)
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  expr_variance$cv_expr and expr_variance$species_code 
## 
##      apul peve   
## peve 0.15 -      
## ptua 0.02 5.6e-06
## 
## P value adjustment method: bonferroni

Analysis 4: Temporal Dynamics - TP2 (Heat Stress) Response

Calculate Timepoint-Specific Changes

# Calculate expression by timepoint
expr_by_tp <- expr_long %>%
  group_by(species_code, species, group_id, gene_id, timepoint, timepoint_num) %>%
  summarise(
    mean_count = mean(count, na.rm = TRUE),
    mean_log = mean(log_count, na.rm = TRUE),
    .groups = "drop"
  )

# Calculate fold change relative to TP1
expr_fc <- expr_by_tp %>%
  group_by(species_code, species, group_id, gene_id) %>%
  mutate(
    tp1_mean = mean_log[timepoint == "TP1"],
    fc_from_tp1 = mean_log - tp1_mean
  ) %>%
  ungroup()

Methylation by Timepoint

# Calculate methylation by timepoint
meth_by_tp <- biomin_meth %>%
  filter(!is.na(methylation)) %>%
  group_by(species_code, species, gene_id, timepoint, timepoint_num) %>%
  summarise(
    mean_meth = mean(methylation, na.rm = TRUE),
    sd_meth = sd(methylation, na.rm = TRUE),
    .groups = "drop"
  )

# Calculate methylation change from TP1
meth_delta <- meth_by_tp %>%
  group_by(species_code, species, gene_id) %>%
  mutate(
    tp1_meth = mean_meth[timepoint == "TP1"],
    delta_meth = mean_meth - tp1_meth
  ) %>%
  ungroup() %>%
  filter(!is.na(delta_meth))

Visualization: Methylation Changes Across Timepoints

# Boxplot of methylation change from TP1
p4 <- meth_delta %>%
  filter(timepoint != "TP1") %>%
  ggplot(aes(x = timepoint, y = delta_meth, fill = species)) +
  geom_boxplot(alpha = 0.7, position = position_dodge(width = 0.8)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  scale_fill_manual(values = species_colors) +
  facet_wrap(~species, ncol = 3) +
  labs(
    title = "Methylation Changes Relative to TP1 (Baseline)",
    subtitle = "TP2 = Heat Stress | Hypothesis: Acropora shows larger remodeling at TP2",
    x = "Timepoint",
    y = "Δ Methylation (from TP1)"
  ) +
  theme(legend.position = "none",
        strip.text = element_text(face = "italic"))

p4

ggsave(file.path(output_dir, "meth_delta_tp.png"), p4, width = 12, height = 8, dpi = 300)

Quantify Remodeling Magnitude

# Calculate absolute methylation change magnitude at each timepoint
remodeling_magnitude <- meth_delta %>%
  filter(timepoint != "TP1") %>%
  group_by(species_code, species, timepoint) %>%
  summarise(
    mean_abs_delta = mean(abs(delta_meth), na.rm = TRUE),
    median_abs_delta = median(abs(delta_meth), na.rm = TRUE),
    n_genes = n(),
    .groups = "drop"
  )

print(remodeling_magnitude)
## # A tibble: 9 × 6
##   species_code species         timepoint mean_abs_delta median_abs_delta n_genes
##   <chr>        <chr>           <chr>              <dbl>            <dbl>   <int>
## 1 apul         Acropora pulch… TP2                1.01            0.207      485
## 2 apul         Acropora pulch… TP3                0.427           0.164      485
## 3 apul         Acropora pulch… TP4                0.426           0.151      485
## 4 peve         Porites everma… TP2                0.821           0.208      483
## 5 peve         Porites everma… TP3                0.671           0.178      483
## 6 peve         Porites everma… TP4                0.617           0.191      483
## 7 ptua         Pocillopora tu… TP2                0.200           0.0933     459
## 8 ptua         Pocillopora tu… TP3                0.257           0.126      459
## 9 ptua         Pocillopora tu… TP4                0.242           0.0994     459
# Visualization
p5 <- ggplot(remodeling_magnitude, aes(x = timepoint, y = mean_abs_delta, 
                                       fill = species, group = species)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), alpha = 0.8) +
  scale_fill_manual(values = species_colors) +
  labs(
    title = "Magnitude of Methylation Remodeling by Timepoint",
    subtitle = "Higher values = more epigenetic remodeling",
    x = "Timepoint",
    y = "Mean |Δ Methylation|"
  ) +
  theme(legend.text = element_text(face = "italic"))

p5

ggsave(file.path(output_dir, "remodeling_magnitude.png"), p5, width = 10, height = 6, dpi = 300)

Analysis 5: Expression-Methylation Correlation

Merge Expression and Methylation Data

# Create gene mapping between expression and methylation
# Expression uses gene_id from biomin counts
# Methylation uses gene_id from methylation files

# Prepare expression summary by gene
expr_summary <- expr_long %>%
  group_by(species_code, gene_id) %>%
  summarise(
    mean_expr = mean(log_count, na.rm = TRUE),
    cv_expr = sd(log_count, na.rm = TRUE) / mean(log_count, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  filter(!is.na(cv_expr) & is.finite(cv_expr))

# Merge with methylation variance
expr_meth_combined <- meth_variance %>%
  inner_join(expr_summary, by = c("species_code", "gene_id"))

cat("Genes with both expression and methylation data:\n")
## Genes with both expression and methylation data:
expr_meth_combined %>%
  group_by(species_code) %>%
  summarise(n_genes = n()) %>%
  print()
## # A tibble: 3 × 2
##   species_code n_genes
##   <chr>          <int>
## 1 apul             484
## 2 peve             479
## 3 ptua             459

Correlation Analysis

# Calculate correlation between methylation and expression CV
correlations <- expr_meth_combined %>%
  group_by(species_code, species) %>%
  summarise(
    cor_mean_meth_expr = cor(mean_meth, mean_expr, use = "complete.obs"),
    cor_cv_meth_cv_expr = cor(cv_meth, cv_expr, use = "complete.obs", method = "spearman"),
    n = n(),
    .groups = "drop"
  )

print(correlations)
## # A tibble: 3 × 5
##   species_code species              cor_mean_meth_expr cor_cv_meth_cv_expr     n
##   <chr>        <chr>                             <dbl>               <dbl> <int>
## 1 apul         Acropora pulchra                  0.307               0.420   484
## 2 peve         Porites evermanni                 0.250               0.475   479
## 3 ptua         Pocillopora tuahini…              0.226               0.420   459

Visualization: Expression vs Methylation

# Scatter plot: Mean methylation vs Mean expression
p6 <- ggplot(expr_meth_combined, aes(x = mean_meth, y = mean_expr, color = species)) +
  geom_point(alpha = 0.5, size = 2) +
  geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
  scale_color_manual(values = species_colors) +
  facet_wrap(~species, scales = "free") +
  labs(
    title = "Gene Body Methylation vs Expression Level",
    subtitle = "Biomineralization genes",
    x = "Mean Methylation (%)",
    y = "Mean Expression (log2)"
  ) +
  theme(legend.position = "none",
        strip.text = element_text(face = "italic"))

p6

ggsave(file.path(output_dir, "expr_meth_scatter.png"), p6, width = 12, height = 8, dpi = 300)

Variability Correlation

# Scatter plot: CV methylation vs CV expression
p7 <- ggplot(expr_meth_combined, aes(x = cv_meth, y = cv_expr, color = species)) +
  geom_point(alpha = 0.5, size = 2) +
  geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
  scale_color_manual(values = species_colors) +
  scale_x_continuous(limits = c(0, quantile(expr_meth_combined$cv_meth, 0.95, na.rm = TRUE))) +
  scale_y_continuous(limits = c(0, quantile(expr_meth_combined$cv_expr, 0.95, na.rm = TRUE))) +
  facet_wrap(~species, scales = "free") +
  labs(
    title = "Methylation Variability vs Expression Variability",
    subtitle = "Higher methylation CV may indicate epigenetic lability",
    x = "CV Methylation",
    y = "CV Expression"
  ) +
  theme(legend.position = "none",
        strip.text = element_text(face = "italic"))

p7

ggsave(file.path(output_dir, "cv_meth_expr_scatter.png"), p7, width = 12, height = 8, dpi = 300)

Analysis 6: Target Gene System Analysis

Annotate Target Gene Systems

# Define target gene keywords
target_keywords <- list(
  calcium_transport = c("PMCA", "plasma membrane calcium", "calcium channel", 
                        "calcium-transporting", "Ca2+-ATPase", "SERCA", "calcium pump"),
  ph_regulation = c("V-type ATPase", "vacuolar ATPase", "SLC", "solute carrier", 
                    "sodium/hydrogen", "Na+/H+", "NHE", "proton pump"),
  carbon_supply = c("carbonic anhydrase", "CA", "bicarbonate")
)

# Search for target genes in annotation
find_target_genes <- function(annotations, keywords) {
  pattern <- paste(keywords, collapse = "|")
  annotations %>%
    filter(str_detect(protein_name, regex(pattern, ignore_case = TRUE)) |
           str_detect(go_mf, regex(pattern, ignore_case = TRUE)) |
           str_detect(go_bp, regex(pattern, ignore_case = TRUE)))
}

# Filter annotation to biomineralization orthogroups
biomin_annot <- ortholog_annot %>%
  filter(group_id %in% biomin_orthogroups)

# Find target genes
target_genes <- list(
  calcium = find_target_genes(biomin_annot, target_keywords$calcium_transport),
  ph = find_target_genes(biomin_annot, target_keywords$ph_regulation),
  carbon = find_target_genes(biomin_annot, target_keywords$carbon_supply)
)

cat("Target gene counts:\n")
## Target gene counts:
cat("  Calcium transport:", nrow(target_genes$calcium), "\n")
##   Calcium transport: 9
cat("  pH regulation:", nrow(target_genes$ph), "\n")
##   pH regulation: 8
cat("  Carbon supply:", nrow(target_genes$carbon), "\n")
##   Carbon supply: 155

Focused Analysis on Target Genes

# Combine target gene orthogroups
target_ogs <- unique(c(
  target_genes$calcium$group_id,
  target_genes$ph$group_id,
  target_genes$carbon$group_id
))

cat("Total target orthogroups:", length(target_ogs), "\n")
## Total target orthogroups: 154
# Filter variance data to target genes
target_meth_var <- meth_variance %>%
  inner_join(
    bind_rows(
      apul_biomin %>% select(group_id, gene_id),
      peve_biomin %>% select(group_id, gene_id),
      ptua_biomin %>% select(group_id, gene_id)
    ) %>% filter(group_id %in% target_ogs),
    by = "gene_id"
  )

target_expr_var <- expr_variance %>%
  filter(group_id %in% target_ogs)

Visualization: Target Gene Variability

if(nrow(target_meth_var) > 0) {
  # Methylation CV for target genes
  p8a <- ggplot(target_meth_var, aes(x = species, y = cv_meth, fill = species)) +
    geom_boxplot(alpha = 0.7) +
    geom_jitter(width = 0.2, alpha = 0.5) +
    scale_fill_manual(values = species_colors) +
    labs(
      title = "Methylation Variability in Target Calcification Genes",
      subtitle = "Ca-ATPases, V-ATPases, Carbonic Anhydrases",
      x = "Species",
      y = "CV Methylation"
    ) +
    theme(legend.position = "none",
          axis.text.x = element_text(face = "italic"))
  
  print(p8a)
  ggsave(file.path(output_dir, "target_genes_meth_cv.png"), p8a, width = 10, height = 6, dpi = 300)
}

if(nrow(target_expr_var) > 0) {
  # Expression CV for target genes
  p8b <- ggplot(target_expr_var, aes(x = species, y = cv_expr, fill = species)) +
    geom_boxplot(alpha = 0.7) +
    geom_jitter(width = 0.2, alpha = 0.5) +
    scale_fill_manual(values = species_colors) +
    labs(
      title = "Expression Variability in Target Calcification Genes",
      x = "Species",
      y = "CV Expression"
    ) +
    theme(legend.position = "none",
          axis.text.x = element_text(face = "italic"))
  
  print(p8b)
  ggsave(file.path(output_dir, "target_genes_expr_cv.png"), p8b, width = 10, height = 6, dpi = 300)
}


Analysis 7: Exon-Level Expression Variability (Alternative Splicing Proxy)

Load and Analyze Exon Data

# Combine exon summaries
exon_combined <- bind_rows(
  apul_exon_summary %>% mutate(species_code = "apul"),
  peve_exon_summary %>% mutate(species_code = "peve"),
  ptua_exon_summary %>% mutate(species_code = "ptua")
) %>%
  mutate(species = species_lookup[species_code])

# Filter to biomineralization genes
biomin_exon <- exon_combined %>%
  filter(group_id %in% biomin_orthogroups | gene_id %in% unlist(biomin_genes))

# Summary statistics
exon_var_summary <- biomin_exon %>%
  group_by(species_code, species) %>%
  summarise(
    mean_cv = mean(cv_expr, na.rm = TRUE),
    median_cv = median(cv_expr, na.rm = TRUE),
    n_exons = n(),
    .groups = "drop"
  )

print(exon_var_summary)
## # A tibble: 3 × 5
##   species_code species                  mean_cv median_cv n_exons
##   <chr>        <chr>                      <dbl>     <dbl>   <int>
## 1 apul         Acropora pulchra            1.39     1.14     8074
## 2 peve         Porites evermanni           1.47     1.12     8746
## 3 ptua         Pocillopora tuahiniensis    1.25     0.968    8152

Visualization: Exon-Level Variability

if(nrow(biomin_exon) > 0) {
  # Filter out extreme outliers for visualization
  exon_plot_data <- biomin_exon %>%
    filter(cv_expr < quantile(cv_expr, 0.95, na.rm = TRUE))
  
  p9 <- ggplot(exon_plot_data, aes(x = species, y = cv_expr, fill = species)) +
    geom_violin(alpha = 0.6) +
    geom_boxplot(width = 0.2, alpha = 0.8) +
    scale_fill_manual(values = species_colors) +
    labs(
      title = "Exon-Level Expression Variability in Biomineralization Genes",
      subtitle = "Proxy for alternative splicing / regulatory complexity",
      x = "Species",
      y = "CV Expression (Exon-Level)"
    ) +
    theme(legend.position = "none",
          axis.text.x = element_text(face = "italic"))
  
  print(p9)
  ggsave(file.path(output_dir, "exon_cv_violin.png"), p9, width = 10, height = 6, dpi = 300)
}


Summary and Hypothesis Evaluation

Compile Results Summary

# Create summary table
summary_table <- tibble(
  Metric = c(
    "Mean Methylation CV",
    "Mean GBM Level",
    "Mean Expression CV",
    "Remodeling Magnitude (TP2)"
  )
)

# Add species values
for(sp in c("apul", "peve", "ptua")) {
  summary_table[[species_lookup[sp]]] <- c(
    meth_variance_summary %>% filter(species_code == sp) %>% pull(mean_cv) %>% round(3),
    gbm_summary %>% filter(species_code == sp) %>% pull(grand_mean) %>% round(2),
    expr_var_summary %>% filter(species_code == sp) %>% pull(mean_cv) %>% round(3),
    remodeling_magnitude %>% filter(species_code == sp, timepoint == "TP2") %>% 
      pull(mean_abs_delta) %>% round(3)
  )
}

print(summary_table)
## # A tibble: 4 × 4
##   Metric           `Acropora pulchra` `Porites evermanni` Pocillopora tuahinie…¹
##   <chr>                         <dbl>               <dbl>                  <dbl>
## 1 Mean Methylatio…              0.598               0.617                  0.439
## 2 Mean GBM Level                7.05                6.71                   3.74 
## 3 Mean Expression…              0.871               0.918                  0.803
## 4 Remodeling Magn…              1.01                0.821                  0.2  
## # ℹ abbreviated name: ¹​`Pocillopora tuahiniensis`
# Save summary
write_csv(summary_table, file.path(output_dir, "hypothesis_summary.csv"))

Hypothesis Evaluation

cat("\n=== HYPOTHESIS EVALUATION ===\n\n")
## 
## === HYPOTHESIS EVALUATION ===
cat("Hypothesis 1: Acropora shows higher methylation variability (CV) than Porites\n")
## Hypothesis 1: Acropora shows higher methylation variability (CV) than Porites
apul_cv <- meth_variance_summary %>% filter(species_code == "apul") %>% pull(mean_cv)
peve_cv <- meth_variance_summary %>% filter(species_code == "peve") %>% pull(mean_cv)
cat(sprintf("  Acropora CV: %.3f | Porites CV: %.3f\n", apul_cv, peve_cv))
##   Acropora CV: 0.598 | Porites CV: 0.617
cat(sprintf("  Result: %s\n\n", ifelse(apul_cv > peve_cv, "SUPPORTED", "NOT SUPPORTED")))
##   Result: NOT SUPPORTED
cat("Hypothesis 2: Porites has higher baseline GBM (Frontloading)\n")
## Hypothesis 2: Porites has higher baseline GBM (Frontloading)
apul_gbm <- gbm_summary %>% filter(species_code == "apul") %>% pull(grand_mean)
peve_gbm <- gbm_summary %>% filter(species_code == "peve") %>% pull(grand_mean)
cat(sprintf("  Acropora GBM: %.2f | Porites GBM: %.2f\n", apul_gbm, peve_gbm))
##   Acropora GBM: 7.05 | Porites GBM: 6.71
cat(sprintf("  Result: %s\n\n", ifelse(peve_gbm > apul_gbm, "SUPPORTED", "NOT SUPPORTED")))
##   Result: NOT SUPPORTED
cat("Hypothesis 3: Acropora shows higher expression variability\n")
## Hypothesis 3: Acropora shows higher expression variability
apul_expr_cv <- expr_var_summary %>% filter(species_code == "apul") %>% pull(mean_cv)
peve_expr_cv <- expr_var_summary %>% filter(species_code == "peve") %>% pull(mean_cv)
cat(sprintf("  Acropora Expr CV: %.3f | Porites Expr CV: %.3f\n", apul_expr_cv, peve_expr_cv))
##   Acropora Expr CV: 0.871 | Porites Expr CV: 0.918
cat(sprintf("  Result: %s\n\n", ifelse(apul_expr_cv > peve_expr_cv, "SUPPORTED", "NOT SUPPORTED")))
##   Result: NOT SUPPORTED
cat("Hypothesis 4: Acropora shows greater methylation remodeling at TP2 (heat stress)\n")
## Hypothesis 4: Acropora shows greater methylation remodeling at TP2 (heat stress)
apul_tp2 <- remodeling_magnitude %>% filter(species_code == "apul", timepoint == "TP2") %>% pull(mean_abs_delta)
peve_tp2 <- remodeling_magnitude %>% filter(species_code == "peve", timepoint == "TP2") %>% pull(mean_abs_delta)
cat(sprintf("  Acropora TP2 |Δmeth|: %.3f | Porites TP2 |Δmeth|: %.3f\n", apul_tp2, peve_tp2))
##   Acropora TP2 |Δmeth|: 1.012 | Porites TP2 |Δmeth|: 0.821
cat(sprintf("  Result: %s\n", ifelse(apul_tp2 > peve_tp2, "SUPPORTED", "NOT SUPPORTED")))
##   Result: SUPPORTED

Combined Visualization

# Create combined summary figure
combined_plot <- plot_grid(
  p1 + theme(plot.title = element_text(size = 10)),
  p2 + theme(plot.title = element_text(size = 10)),
  p3 + theme(plot.title = element_text(size = 10)),
  p5 + theme(plot.title = element_text(size = 10)),
  ncol = 2,
  labels = c("A", "B", "C", "D")
)

combined_plot

ggsave(file.path(output_dir, "combined_hypothesis_figure.png"), 
       combined_plot, width = 14, height = 12, dpi = 300)

Session Info

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] broom_1.0.11       cowplot_1.2.0      ggpubr_0.6.2       viridis_0.6.5     
##  [5] viridisLite_0.4.2  RColorBrewer_1.1-3 pheatmap_1.0.13    corrplot_0.95     
##  [9] lubridate_1.9.4    forcats_1.0.1      stringr_1.6.0      dplyr_1.1.4       
## [13] purrr_1.2.0        readr_2.1.6        tidyr_1.3.1        tibble_3.3.0      
## [17] ggplot2_4.0.1      tidyverse_2.0.0   
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6      xfun_0.54         bslib_0.9.0       rstatix_0.7.3    
##  [5] lattice_0.22-7    tzdb_0.5.0        vctrs_0.6.5       tools_4.5.1      
##  [9] generics_0.1.4    curl_7.0.0        parallel_4.5.1    pkgconfig_2.0.3  
## [13] Matrix_1.7-4      S7_0.2.1          lifecycle_1.0.4   compiler_4.5.1   
## [17] farver_2.1.2      textshaping_1.0.4 carData_3.0-5     htmltools_0.5.9  
## [21] sass_0.4.10       yaml_2.3.11       Formula_1.2-5     pillar_1.11.1    
## [25] car_3.1-3         crayon_1.5.3      jquerylib_0.1.4   cachem_1.1.0     
## [29] abind_1.4-8       nlme_3.1-168      tidyselect_1.2.1  digest_0.6.39    
## [33] stringi_1.8.7     splines_4.5.1     labeling_0.4.3    fastmap_1.2.0    
## [37] grid_4.5.1        cli_3.6.5         magrittr_2.0.4    utf8_1.2.6       
## [41] withr_3.0.2       scales_1.4.0      backports_1.5.0   bit64_4.6.0-1    
## [45] timechange_0.3.0  rmarkdown_2.30    bit_4.6.0         gridExtra_2.3    
## [49] ggsignif_0.6.4    ragg_1.5.0        hms_1.1.4         evaluate_1.0.5   
## [53] knitr_1.50        mgcv_1.9-4        rlang_1.1.6       glue_1.8.0       
## [57] rstudioapi_0.17.1 vroom_1.6.7       jsonlite_2.0.0    R6_2.6.1         
## [61] systemfonts_1.3.1