Overview

This analysis examines whether the same group_id (orthogroup) appears across multiple components with the same GO BP terms, which would indicate redundancy in the cross-component frequency counts.

Load Libraries

library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(knitr)

Define Paths

# Input directory
input_dir <- "/Users/sr320/Documents/GitHub/timeseries_molecular/M-multi-species/output/22-Visualizing-Rank-outs"

# Output directory
output_dir <- "/Users/sr320/Documents/GitHub/timeseries_molecular/M-multi-species/output/23-visualizing-rank35"

# Create output directory if it doesn't exist
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

Load All Data

# Get all rank_35 top100 annotation files
files <- list.files(input_dir, 
                   pattern = "rank_35_comp.*_top100_annotation\\.csv$", 
                   full.names = TRUE)

# Sort files by component number
file_df <- data.frame(
  path = files,
  component = str_extract(basename(files), "comp\\d+")
) %>%
  mutate(comp_num = as.numeric(str_extract(component, "\\d+"))) %>%
  arrange(comp_num)

cat(sprintf("Loading data from %d files...\n", nrow(file_df)))
## Loading data from 35 files...
# Read all files and combine
all_data <- map2_df(file_df$path, file_df$component, function(path, comp) {
  read_csv(path, show_col_types = FALSE) %>%
    mutate(component = comp)
})

cat(sprintf("Total rows loaded: %d\n", nrow(all_data)))
## Total rows loaded: 3501
cat(sprintf("Unique group_ids: %d\n", n_distinct(all_data$group_id)))
## Unique group_ids: 475

Parse GO BP Terms with Group ID Tracking

# Function to expand GO BP terms while maintaining group_id
expand_go_bp_terms <- function(data) {
  data %>%
    filter(!is.na(go_bp) & go_bp != "") %>%
    select(group_id, component, go_bp) %>%
    mutate(
      # Split GO terms by semicolon
      go_terms = str_split(go_bp, ";")
    ) %>%
    unnest(go_terms) %>%
    mutate(
      # Clean up the term (remove GO:XXXXX part)
      go_term_clean = str_trim(go_terms),
      go_term_clean = str_replace(go_term_clean, "\\s*\\[GO:.*\\]", "")
    ) %>%
    filter(go_term_clean != "") %>%
    select(group_id, component, go_term = go_term_clean)
}

# Expand all GO terms with group_id tracking
go_expanded <- expand_go_bp_terms(all_data)

cat(sprintf("Total GO term occurrences: %d\n", nrow(go_expanded)))
## Total GO term occurrences: 10831
cat(sprintf("Unique GO terms: %d\n", n_distinct(go_expanded$go_term)))
## Unique GO terms: 1116

Analyze Redundancy

# For each GO term, count:
# 1. Total occurrences across all components
# 2. Number of unique group_ids
# 3. Number of components it appears in
# 4. How many times the same group_id appears in multiple components

go_term_analysis <- go_expanded %>%
  group_by(go_term) %>%
  summarise(
    total_occurrences = n(),
    unique_group_ids = n_distinct(group_id),
    n_components = n_distinct(component),
    # Calculate redundancy: if a group_id appears in 3 components, that's 2 redundant occurrences
    redundant_occurrences = total_occurrences - unique_group_ids
  ) %>%
  mutate(
    # Percentage of occurrences that are redundant (same group_id in multiple components)
    redundancy_pct = (redundant_occurrences / total_occurrences) * 100,
    # Average times each unique group_id appears
    avg_times_per_group = total_occurrences / unique_group_ids
  ) %>%
  arrange(desc(total_occurrences))

# Display top terms
cat("\n### Top 30 GO Terms with Redundancy Analysis\n\n")
## 
## ### Top 30 GO Terms with Redundancy Analysis
print(kable(head(go_term_analysis, 30), digits = 2))
## 
## 
## |go_term                                                                           | total_occurrences| unique_group_ids| n_components| redundant_occurrences| redundancy_pct| avg_times_per_group|
## |:---------------------------------------------------------------------------------|-----------------:|----------------:|------------:|---------------------:|--------------:|-------------------:|
## |proteolysis                                                                       |                94|               11|           35|                    83|          88.30|                8.55|
## |positive regulation of transcription by RNA polymerase II                         |                91|               10|           33|                    81|          89.01|                9.10|
## |innate immune response                                                            |                88|                5|           35|                    83|          94.32|               17.60|
## |positive regulation of gene expression                                            |                86|               10|           33|                    76|          88.37|                8.60|
## |defense response to Gram-negative bacterium                                       |                81|                5|           35|                    76|          93.83|               16.20|
## |signal transduction                                                               |                79|                6|           35|                    73|          92.41|               13.17|
## |adenylate cyclase-activating G protein-coupled receptor signaling pathway         |                71|                4|           35|                    67|          94.37|               17.75|
## |visual perception                                                                 |                68|                6|           35|                    62|          91.18|               11.33|
## |gene expression                                                                   |                67|                6|           32|                    61|          91.04|               11.17|
## |cell surface receptor signaling pathway                                           |                66|                3|           35|                    63|          95.45|               22.00|
## |negative regulation of transcription by RNA polymerase II                         |                66|                9|           34|                    57|          86.36|                7.33|
## |negative regulation of apoptotic process                                          |                62|                7|           32|                    55|          88.71|                8.86|
## |positive regulation of reactive oxygen species biosynthetic process               |                58|                2|           35|                    56|          96.55|               29.00|
## |response to lipopolysaccharide                                                    |                57|                4|           28|                    53|          92.98|               14.25|
## |response to bacterium                                                             |                56|                4|           29|                    52|          92.86|               14.00|
## |cell adhesion                                                                     |                53|                6|           35|                    47|          88.68|                8.83|
## |cilium assembly                                                                   |                51|                8|           31|                    43|          84.31|                6.38|
## |flagellated sperm motility                                                        |                49|                7|           35|                    42|          85.71|                7.00|
## |regulation of transcription by RNA polymerase II                                  |                48|               10|           24|                    38|          79.17|                4.80|
## |toll-like receptor signaling pathway                                              |                48|                2|           30|                    46|          95.83|               24.00|
## |basement membrane organization                                                    |                46|                4|           27|                    42|          91.30|               11.50|
## |cell division                                                                     |                45|                8|           30|                    37|          82.22|                5.62|
## |mRNA processing                                                                   |                44|                6|           27|                    38|          86.36|                7.33|
## |negative regulation of tumor necrosis factor production                           |                44|                2|           28|                    42|          95.45|               22.00|
## |actin cytoskeleton organization                                                   |                42|                3|           27|                    39|          92.86|               14.00|
## |negative regulation of cell population proliferation                              |                41|                4|           35|                    37|          90.24|               10.25|
## |negative regulation of transforming growth factor beta receptor signaling pathway |                41|                3|           34|                    38|          92.68|               13.67|
## |muscle organ development                                                          |                40|                3|           35|                    37|          92.50|               13.33|
## |in utero embryonic development                                                    |                39|                6|           27|                    33|          84.62|                6.50|
## |negative regulation of angiogenesis                                               |                39|                2|           35|                    37|          94.87|               19.50|

Detailed Analysis: Top Terms

# Focus on top 30 terms from previous analysis
top_30_terms <- head(go_term_analysis$go_term, 30)

# For each top term, show which group_ids appear in multiple components
redundancy_details <- go_expanded %>%
  filter(go_term %in% top_30_terms) %>%
  group_by(go_term, group_id) %>%
  summarise(
    n_components = n_distinct(component),
    components = paste(sort(unique(component)), collapse = ", "),
    .groups = "drop"
  ) %>%
  filter(n_components > 1) %>%
  arrange(go_term, desc(n_components))

cat(sprintf("\nFound %d group_ids that appear in multiple components\n", nrow(redundancy_details)))
## 
## Found 123 group_ids that appear in multiple components
cat(sprintf("Across the top 30 GO terms\n\n"))
## Across the top 30 GO terms
# Show examples
if (nrow(redundancy_details) > 0) {
  cat("### Examples of Group IDs Appearing in Multiple Components\n\n")
  print(kable(head(redundancy_details, 20)))
}
## ### Examples of Group IDs Appearing in Multiple Components
## 
## 
## 
## |go_term                                                                   |group_id | n_components|components                                                                                                                                                                                                                                                                    |
## |:-------------------------------------------------------------------------|:--------|------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
## |actin cytoskeleton organization                                           |OG_01645 |           26|comp1, comp11, comp13, comp14, comp15, comp16, comp18, comp2, comp20, comp21, comp22, comp24, comp25, comp26, comp27, comp28, comp3, comp30, comp31, comp32, comp34, comp4, comp6, comp7, comp8, comp9                                                                        |
## |actin cytoskeleton organization                                           |OG_08222 |           15|comp1, comp11, comp13, comp15, comp16, comp2, comp22, comp25, comp27, comp3, comp31, comp4, comp6, comp7, comp8                                                                                                                                                               |
## |adenylate cyclase-activating G protein-coupled receptor signaling pathway |OG_02537 |           35|comp1, comp10, comp11, comp12, comp13, comp14, comp15, comp16, comp17, comp18, comp19, comp2, comp20, comp21, comp22, comp23, comp24, comp25, comp26, comp27, comp28, comp29, comp3, comp30, comp31, comp32, comp33, comp34, comp35, comp4, comp5, comp6, comp7, comp8, comp9 |
## |adenylate cyclase-activating G protein-coupled receptor signaling pathway |OG_02927 |           29|comp1, comp10, comp11, comp13, comp14, comp15, comp16, comp18, comp2, comp20, comp21, comp22, comp23, comp24, comp25, comp26, comp27, comp28, comp29, comp30, comp31, comp32, comp33, comp34, comp4, comp6, comp7, comp8, comp9                                               |
## |adenylate cyclase-activating G protein-coupled receptor signaling pathway |OG_07351 |            6|comp12, comp16, comp17, comp3, comp30, comp34                                                                                                                                                                                                                                 |
## |basement membrane organization                                            |OG_01645 |           26|comp1, comp11, comp13, comp14, comp15, comp16, comp18, comp2, comp20, comp21, comp22, comp24, comp25, comp26, comp27, comp28, comp3, comp30, comp31, comp32, comp34, comp4, comp6, comp7, comp8, comp9                                                                        |
## |basement membrane organization                                            |OG_02338 |           15|comp11, comp13, comp15, comp16, comp2, comp20, comp21, comp28, comp30, comp34, comp4, comp6, comp7, comp8, comp9                                                                                                                                                              |
## |basement membrane organization                                            |OG_03906 |            4|comp16, comp17, comp22, comp34                                                                                                                                                                                                                                                |
## |cell adhesion                                                             |OG_02537 |           35|comp1, comp10, comp11, comp12, comp13, comp14, comp15, comp16, comp17, comp18, comp19, comp2, comp20, comp21, comp22, comp23, comp24, comp25, comp26, comp27, comp28, comp29, comp3, comp30, comp31, comp32, comp33, comp34, comp35, comp4, comp5, comp6, comp7, comp8, comp9 |
## |cell adhesion                                                             |OG_08313 |            7|comp11, comp15, comp16, comp2, comp22, comp28, comp4                                                                                                                                                                                                                          |
## |cell adhesion                                                             |OG_04601 |            6|comp11, comp15, comp16, comp2, comp22, comp4                                                                                                                                                                                                                                  |
## |cell adhesion                                                             |OG_06280 |            2|comp29, comp35                                                                                                                                                                                                                                                                |
## |cell adhesion                                                             |OG_09347 |            2|comp17, comp30                                                                                                                                                                                                                                                                |
## |cell division                                                             |OG_01645 |           26|comp1, comp11, comp13, comp14, comp15, comp16, comp18, comp2, comp20, comp21, comp22, comp24, comp25, comp26, comp27, comp28, comp3, comp30, comp31, comp32, comp34, comp4, comp6, comp7, comp8, comp9                                                                        |
## |cell division                                                             |OG_08313 |            7|comp11, comp15, comp16, comp2, comp22, comp28, comp4                                                                                                                                                                                                                          |
## |cell division                                                             |OG_04601 |            6|comp11, comp15, comp16, comp2, comp22, comp4                                                                                                                                                                                                                                  |
## |cell division                                                             |OG_07762 |            2|comp12, comp17                                                                                                                                                                                                                                                                |
## |cell surface receptor signaling pathway                                   |OG_02537 |           35|comp1, comp10, comp11, comp12, comp13, comp14, comp15, comp16, comp17, comp18, comp19, comp2, comp20, comp21, comp22, comp23, comp24, comp25, comp26, comp27, comp28, comp29, comp3, comp30, comp31, comp32, comp33, comp34, comp35, comp4, comp5, comp6, comp7, comp8, comp9 |
## |cell surface receptor signaling pathway                                   |OG_02927 |           29|comp1, comp10, comp11, comp13, comp14, comp15, comp16, comp18, comp2, comp20, comp21, comp22, comp23, comp24, comp25, comp26, comp27, comp28, comp29, comp30, comp31, comp32, comp33, comp34, comp4, comp6, comp7, comp8, comp9                                               |
## |cell surface receptor signaling pathway                                   |OG_09347 |            2|comp17, comp30                                                                                                                                                                                                                                                                |

Summary Statistics

# Overall statistics
overall_stats <- data.frame(
  Metric = c(
    "Total GO term occurrences",
    "Unique GO terms",
    "Unique group_ids with GO terms",
    "Redundant occurrences (same group_id in multiple components)",
    "Overall redundancy percentage"
  ),
  Value = c(
    nrow(go_expanded),
    n_distinct(go_expanded$go_term),
    n_distinct(go_expanded$group_id),
    sum(go_term_analysis$redundant_occurrences),
    round(sum(go_term_analysis$redundant_occurrences) / nrow(go_expanded) * 100, 2)
  )
)

cat("\n### Overall Statistics\n\n")
## 
## ### Overall Statistics
print(kable(overall_stats))
## 
## 
## |Metric                                                       |    Value|
## |:------------------------------------------------------------|--------:|
## |Total GO term occurrences                                    | 10831.00|
## |Unique GO terms                                              |  1116.00|
## |Unique group_ids with GO terms                               |   168.00|
## |Redundant occurrences (same group_id in multiple components) |  9324.00|
## |Overall redundancy percentage                                |    86.09|
# Stats for top 30 terms
top_30_stats <- go_term_analysis %>%
  slice_head(n = 30) %>%
  summarise(
    total_occurrences = sum(total_occurrences),
    redundant_occurrences = sum(redundant_occurrences),
    avg_redundancy_pct = mean(redundancy_pct)
  )

cat("\n### Top 30 Terms Statistics\n\n")
## 
## ### Top 30 Terms Statistics
cat(sprintf("Total occurrences: %d\n", top_30_stats$total_occurrences))
## Total occurrences: 1760
cat(sprintf("Redundant occurrences: %d\n", top_30_stats$redundant_occurrences))
## Redundant occurrences: 1594
cat(sprintf("Redundancy percentage: %.2f%%\n", (top_30_stats$redundant_occurrences / top_30_stats$total_occurrences * 100)))
## Redundancy percentage: 90.57%
cat(sprintf("Average redundancy per term: %.2f%%\n", top_30_stats$avg_redundancy_pct))
## Average redundancy per term: 90.42%

Visualization: Redundancy Analysis

# Plot for top 30 terms
plot_data <- go_term_analysis %>%
  slice_head(n = 30) %>%
  mutate(
    go_term = factor(go_term, levels = rev(go_term))
  ) %>%
  pivot_longer(
    cols = c(unique_group_ids, redundant_occurrences),
    names_to = "type",
    values_to = "count"
  ) %>%
  mutate(
    type = case_when(
      type == "unique_group_ids" ~ "Unique group_ids",
      type == "redundant_occurrences" ~ "Redundant (same group_id)"
    )
  )

p1 <- ggplot(plot_data, aes(x = go_term, y = count, fill = type)) +
  geom_bar(stat = "identity", position = "stack") +
  coord_flip() +
  scale_fill_manual(
    values = c("Unique group_ids" = "steelblue", 
               "Redundant (same group_id)" = "orange"),
    name = "Occurrence Type"
  ) +
  labs(
    title = "Redundancy Analysis: Top 30 GO BP Terms",
    subtitle = "Blue = unique group_ids, Orange = redundant occurrences (same group_id in multiple components)",
    x = "GO Biological Process Term",
    y = "Count"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray40"),
    axis.text.y = element_text(size = 10),
    legend.position = "bottom"
  )

print(p1)

ggsave(
  filename = file.path(output_dir, "rank35_GO_redundancy_analysis.png"),
  plot = p1,
  width = 14,
  height = 10,
  dpi = 300
)

Redundancy Percentage Plot

p2 <- go_term_analysis %>%
  slice_head(n = 30) %>%
  mutate(go_term = factor(go_term, levels = rev(go_term))) %>%
  ggplot(aes(x = go_term, y = redundancy_pct)) +
  geom_bar(stat = "identity", fill = "coral", alpha = 0.8) +
  geom_text(aes(label = sprintf("%.1f%%", redundancy_pct)), 
            hjust = -0.1, size = 3) +
  coord_flip() +
  labs(
    title = "Redundancy Percentage: Top 30 GO BP Terms",
    subtitle = "Percentage of occurrences due to same group_id appearing in multiple components",
    x = "GO Biological Process Term",
    y = "Redundancy Percentage (%)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray40"),
    axis.text.y = element_text(size = 10),
    panel.grid.major.y = element_blank()
  )

print(p2)

ggsave(
  filename = file.path(output_dir, "rank35_GO_redundancy_percentage.png"),
  plot = p2,
  width = 14,
  height = 10,
  dpi = 300
)

Group IDs Appearing in Most Components

# Which group_ids appear in the most components?
group_id_frequency <- go_expanded %>%
  group_by(group_id) %>%
  summarise(
    n_components = n_distinct(component),
    n_go_terms = n_distinct(go_term),
    go_terms_sample = paste(head(unique(go_term), 3), collapse = "; ")
  ) %>%
  arrange(desc(n_components)) %>%
  head(50)

cat("\n### Top 50 Group IDs by Component Frequency\n\n")
## 
## ### Top 50 Group IDs by Component Frequency
print(kable(head(group_id_frequency, 50)))
## 
## 
## |group_id | n_components| n_go_terms|go_terms_sample                                                                                                                                                                                               |
## |:--------|------------:|----------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
## |OG_00264 |           35|          1|flagellated sperm motility                                                                                                                                                                                    |
## |OG_01620 |           35|          4|retinoid metabolic process; retinol metabolic process; steroid metabolic process                                                                                                                              |
## |OG_02537 |           35|         22|adenylate cyclase-activating G protein-coupled receptor signaling pathway; apoptotic cell clearance; axonogenesis                                                                                             |
## |OG_01636 |           34|          3|negative regulation of activin receptor signaling pathway; negative regulation of BMP signaling pathway; negative regulation of transforming growth factor beta receptor signaling pathway                    |
## |OG_01919 |           31|          6|diacylglycerol catabolic process; phosphatidylcholine catabolic process; phosphatidylethanolamine catabolic process                                                                                           |
## |OG_02927 |           29|          5|adenylate cyclase-activating G protein-coupled receptor signaling pathway; cell surface receptor signaling pathway; circadian behavior                                                                        |
## |OG_03269 |           29|          1|proteolysis                                                                                                                                                                                                   |
## |OG_07218 |           27|          4|Notch signaling pathway; protein ubiquitination; regulation of postsynaptic neurotransmitter receptor internalization                                                                                         |
## |OG_08311 |           27|         11|cilium assembly; determination of heart left/right asymmetry; notochord morphogenesis                                                                                                                         |
## |OG_01645 |           26|          7|actin cytoskeleton organization; basement membrane organization; cell division                                                                                                                                |
## |OG_02341 |           26|          7|cellular response to stress; G protein-coupled glutamate receptor signaling pathway; gene expression                                                                                                          |
## |OG_02998 |           25|          5|intracellular signal transduction; negative regulation of apoptotic process; positive regulation of transcription by RNA polymerase II                                                                        |
## |OG_02929 |           24|          8|atrial cardiac muscle cell development; heart trabecula formation; negative regulation of apoptotic process                                                                                                   |
## |OG_03335 |           23|          1|negative regulation of epidermal growth factor receptor signaling pathway                                                                                                                                     |
## |OG_04437 |           23|         31|activation of NF-kappaB-inducing kinase activity; cellular response to amyloid-beta; cellular response to diacyl bacterial lipopeptide                                                                        |
## |OG_04516 |           23|          1|protein localization to nucleus                                                                                                                                                                               |
## |OG_00643 |           22|          1|proteolysis                                                                                                                                                                                                   |
## |OG_08098 |           21|         54|cell population proliferation; cellular response to angiotensin; cellular response to cadmium ion                                                                                                             |
## |OG_06663 |           19|         26|adherens junction maintenance; cell-matrix adhesion; focal adhesion assembly                                                                                                                                  |
## |OG_00622 |           18|          8|blastocyst development; columnar/cuboidal epithelial cell differentiation; defense response to Gram-negative bacterium                                                                                        |
## |OG_03122 |           18|          4|chloride transport; regulation of cardiac muscle contraction by regulation of the release of sequestered calcium ion; regulation of release of sequestered calcium ion into cytosol by sarcoplasmic reticulum |
## |OG_01101 |           16|          2|microtubule bundle formation; protein modification process                                                                                                                                                    |
## |OG_02936 |           16|          2|CDP biosynthetic process; UDP biosynthetic process                                                                                                                                                            |
## |OG_04538 |           16|          2|antibacterial innate immune response; defense response to Gram-negative bacterium                                                                                                                             |
## |OG_05198 |           16|         23|antiviral innate immune response; cellular response to exogenous dsRNA; cytoplasmic pattern recognition receptor signaling pathway                                                                            |
## |OG_02338 |           15|         78|antigen processing and presentation; axon guidance; basement membrane organization                                                                                                                            |
## |OG_08222 |           15|          2|actin cytoskeleton organization; actin nucleation                                                                                                                                                             |
## |OG_07129 |           14|          8|inner ear development; intracellular triglyceride homeostasis; negative regulation of mitochondrial calcium ion concentration                                                                                 |
## |OG_08842 |           14|          1|sensory perception of sound                                                                                                                                                                                   |
## |OG_07963 |           13|          3|bradykinin catabolic process; negative regulation of programmed cell death; proteolysis                                                                                                                       |
## |OG_00225 |           12|          9|citrate metabolic process; intestinal absorption; intracellular iron ion homeostasis                                                                                                                          |
## |OG_06323 |           12|          5|DNA double-strand break processing; double-strand break repair; double-strand break repair via homologous recombination                                                                                       |
## |OG_06909 |           12|          6|apoptotic process; chitin catabolic process; immune system process                                                                                                                                            |
## |OG_06279 |           11|          8|blastocyst development; columnar/cuboidal epithelial cell differentiation; defense response to Gram-negative bacterium                                                                                        |
## |OG_08952 |           11|          7|auditory receptor cell stereocilium organization; cilium assembly; determination of left/right symmetry                                                                                                       |
## |OG_07947 |           10|          8|cellular response to glucose stimulus; cellular response to tumor necrosis factor; gluconeogenesis                                                                                                            |
## |OG_01210 |            9|         12|blood circulation; defense response to bacterium; immune system process                                                                                                                                       |
## |OG_04857 |            9|         49|adult heart development; apoptotic process involved in heart morphogenesis; atrial cardiac muscle tissue development                                                                                          |
## |OG_04999 |            9|          8|camera-type eye development; circadian regulation of gene expression; embryonic retina morphogenesis in camera-type eye                                                                                       |
## |OG_05636 |            9|          2|cellular response to nerve growth factor stimulus; endocytic recycling                                                                                                                                        |
## |OG_07887 |            9|          9|innate immune response; pattern recognition receptor signaling pathway; positive regulation of inflammatory response                                                                                          |
## |OG_08392 |            9|          5|cartilage development; cell differentiation; ossification                                                                                                                                                     |
## |OG_10182 |            9|          1|regulation of dendrite development                                                                                                                                                                            |
## |OG_00797 |            8|          1|tight junction assembly                                                                                                                                                                                       |
## |OG_07103 |            8|          1|unsaturated fatty acid biosynthetic process                                                                                                                                                                   |
## |OG_07200 |            8|          4|heart development; mRNA processing; regulation of translation                                                                                                                                                 |
## |OG_10186 |            8|          8|epidermis development; gene expression; lymph circulation                                                                                                                                                     |
## |OG_01396 |            7|          8|chaperone cofactor-dependent protein refolding; clathrin coat disassembly; mRNA processing                                                                                                                    |
## |OG_08313 |            7|         15|cell adhesion; cell division; cell migration                                                                                                                                                                  |
## |OG_09440 |            7|          7|cell differentiation; hematopoietic stem cell proliferation; mesenchymal cell apoptotic process                                                                                                               |
# Plot
p3 <- group_id_frequency %>%
  head(30) %>%
  mutate(group_id = factor(group_id, levels = rev(group_id))) %>%
  ggplot(aes(x = group_id, y = n_components)) +
  geom_bar(stat = "identity", fill = "darkgreen", alpha = 0.7) +
  geom_text(aes(label = n_components), hjust = -0.2, size = 3) +
  coord_flip() +
  labs(
    title = "Top 30 Group IDs by Number of Components",
    subtitle = "Group IDs that appear in the most components",
    x = "Group ID",
    y = "Number of Components"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.text.y = element_text(size = 9),
    panel.grid.major.y = element_blank()
  )

print(p3)

ggsave(
  filename = file.path(output_dir, "rank35_most_frequent_group_ids.png"),
  plot = p3,
  width = 12,
  height = 8,
  dpi = 300
)

Export Detailed Results

# Export full redundancy analysis
write_csv(
  go_term_analysis,
  file.path(output_dir, "rank35_GO_redundancy_full_analysis.csv")
)

# Export group IDs appearing in multiple components
write_csv(
  redundancy_details,
  file.path(output_dir, "rank35_redundant_group_ids_details.csv")
)

# Export group ID frequency
write_csv(
  group_id_frequency,
  file.path(output_dir, "rank35_group_id_component_frequency.csv")
)

cat("\nExported detailed results to CSV files\n")
## 
## Exported detailed results to CSV files

Session Info

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.7.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] C
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.48         RColorBrewer_1.1-3 lubridate_1.9.3    forcats_1.0.0     
##  [5] stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2        readr_2.1.5       
##  [9] tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0   
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9        utf8_1.2.4        generics_0.1.3    stringi_1.8.4    
##  [5] hms_1.1.3         digest_0.6.37     magrittr_2.0.3    evaluate_1.0.0   
##  [9] grid_4.3.2        timechange_0.3.0  fastmap_1.2.0     jsonlite_1.8.9   
## [13] fansi_1.0.6       scales_1.3.0      textshaping_0.4.0 jquerylib_0.1.4  
## [17] cli_3.6.3         rlang_1.1.4       crayon_1.5.3      bit64_4.5.2      
## [21] munsell_0.5.1     withr_3.0.1       cachem_1.1.0      yaml_2.3.10      
## [25] tools_4.3.2       parallel_4.3.2    tzdb_0.4.0        colorspace_2.1-1 
## [29] vctrs_0.6.5       R6_2.5.1          lifecycle_1.0.4   bit_4.5.0        
## [33] vroom_1.6.5       ragg_1.3.3        pkgconfig_2.0.3   pillar_1.9.0     
## [37] bslib_0.8.0       gtable_0.3.6      glue_1.8.0        systemfonts_1.1.0
## [41] highr_0.11        xfun_0.48         tidyselect_1.2.1  farver_2.1.2     
## [45] htmltools_0.5.8.1 rmarkdown_2.28    labeling_0.4.3    compiler_4.3.2