Overview

This analysis examines whether the same group_id (orthogroup) appears across multiple components with the same GO BP terms, which would indicate redundancy in the cross-component frequency counts.

Load Libraries

library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(knitr)

Define Paths

# Input directory
input_dir <- "/Users/sr320/Documents/GitHub/timeseries_molecular/M-multi-species/output/22-Visualizing-Rank-outs"

# Output directory
output_dir <- "/Users/sr320/Documents/GitHub/timeseries_molecular/M-multi-species/output/24-visualizing-rank05"

# Create output directory if it doesn't exist
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

Load All Data

# Get all rank_05 top100 annotation files
files <- list.files(input_dir, 
                   pattern = "rank_05_comp.*_top100_annotation\\.csv$", 
                   full.names = TRUE)

# Sort files by component number
file_df <- data.frame(
  path = files,
  component = str_extract(basename(files), "comp\\d+")
) %>%
  mutate(comp_num = as.numeric(str_extract(component, "\\d+"))) %>%
  arrange(comp_num)

cat(sprintf("Loading data from %d files...\n", nrow(file_df)))
## Loading data from 5 files...
# Read all files and combine
all_data <- map2_df(file_df$path, file_df$component, function(path, comp) {
  read_csv(path, show_col_types = FALSE) %>%
    mutate(component = comp)
})

cat(sprintf("Total rows loaded: %d\n", nrow(all_data)))
## Total rows loaded: 500
cat(sprintf("Unique group_ids: %d\n", n_distinct(all_data$group_id)))
## Unique group_ids: 240

Parse GO BP Terms with Group ID Tracking

# Function to expand GO BP terms while maintaining group_id
expand_go_bp_terms <- function(data) {
  data %>%
    filter(!is.na(go_bp) & go_bp != "") %>%
    select(group_id, component, go_bp) %>%
    mutate(
      # Split GO terms by semicolon
      go_terms = str_split(go_bp, ";")
    ) %>%
    unnest(go_terms) %>%
    mutate(
      # Clean up the term (remove GO:XXXXX part)
      go_term_clean = str_trim(go_terms),
      go_term_clean = str_replace(go_term_clean, "\\s*\\[GO:.*\\]", "")
    ) %>%
    filter(go_term_clean != "") %>%
    select(group_id, component, go_term = go_term_clean)
}

# Expand all GO terms with group_id tracking
go_expanded <- expand_go_bp_terms(all_data)

cat(sprintf("Total GO term occurrences: %d\n", nrow(go_expanded)))
## Total GO term occurrences: 2290
cat(sprintf("Unique GO terms: %d\n", n_distinct(go_expanded$go_term)))
## Unique GO terms: 833

Analyze Redundancy

# For each GO term, count:
# 1. Total occurrences across all components
# 2. Number of unique group_ids
# 3. Number of components it appears in
# 4. How many times the same group_id appears in multiple components

go_term_analysis <- go_expanded %>%
  group_by(go_term) %>%
  summarise(
    total_occurrences = n(),
    unique_group_ids = n_distinct(group_id),
    n_components = n_distinct(component),
    # Calculate redundancy: if a group_id appears in 3 components, that's 2 redundant occurrences
    redundant_occurrences = total_occurrences - unique_group_ids
  ) %>%
  mutate(
    # Percentage of occurrences that are redundant (same group_id in multiple components)
    redundancy_pct = (redundant_occurrences / total_occurrences) * 100,
    # Average times each unique group_id appears
    avg_times_per_group = total_occurrences / unique_group_ids
  ) %>%
  arrange(desc(total_occurrences))

# Display top terms
cat("\n### Top 30 GO Terms with Redundancy Analysis\n\n")
## 
## ### Top 30 GO Terms with Redundancy Analysis
print(kable(head(go_term_analysis, 30), digits = 2))
## 
## 
## |go_term                                                                     | total_occurrences| unique_group_ids| n_components| redundant_occurrences| redundancy_pct| avg_times_per_group|
## |:---------------------------------------------------------------------------|-----------------:|----------------:|------------:|---------------------:|--------------:|-------------------:|
## |negative regulation of transcription by RNA polymerase II                   |                28|               12|            5|                    16|          57.14|                2.33|
## |regulation of transcription by RNA polymerase II                            |                26|               12|            5|                    14|          53.85|                2.17|
## |positive regulation of transcription by RNA polymerase II                   |                23|               13|            5|                    10|          43.48|                1.77|
## |positive regulation of DNA-templated transcription                          |                19|               10|            5|                     9|          47.37|                1.90|
## |positive regulation of gene expression                                      |                17|                8|            5|                     9|          52.94|                2.12|
## |nervous system development                                                  |                16|                7|            5|                     9|          56.25|                2.29|
## |cell division                                                               |                15|                7|            5|                     8|          53.33|                2.14|
## |cell population proliferation                                               |                15|                6|            5|                     9|          60.00|                2.50|
## |Wnt signaling pathway                                                       |                14|                6|            5|                     8|          57.14|                2.33|
## |negative regulation of apoptotic process                                    |                14|                7|            5|                     7|          50.00|                2.00|
## |kidney development                                                          |                12|                4|            4|                     8|          66.67|                3.00|
## |regulation of DNA-templated transcription                                   |                11|                4|            5|                     7|          63.64|                2.75|
## |basement membrane organization                                              |                10|                5|            5|                     5|          50.00|                2.00|
## |cellular response to leukemia inhibitory factor                             |                10|                3|            4|                     7|          70.00|                3.33|
## |homophilic cell adhesion via plasma membrane adhesion molecules             |                10|                5|            5|                     5|          50.00|                2.00|
## |proteolysis                                                                 |                10|                5|            5|                     5|          50.00|                2.00|
## |regulation of cell cycle                                                    |                10|                3|            4|                     7|          70.00|                3.33|
## |roof of mouth development                                                   |                10|                5|            4|                     5|          50.00|                2.00|
## |sensory perception of sound                                                 |                10|                3|            5|                     7|          70.00|                3.33|
## |negative regulation of cell population proliferation                        |                 9|                5|            5|                     4|          44.44|                1.80|
## |neuron projection morphogenesis                                             |                 9|                3|            5|                     6|          66.67|                3.00|
## |retina layer formation                                                      |                 9|                3|            5|                     6|          66.67|                3.00|
## |sprouting angiogenesis                                                      |                 9|                3|            4|                     6|          66.67|                3.00|
## |DNA repair                                                                  |                 8|                4|            5|                     4|          50.00|                2.00|
## |branching involved in ureteric bud morphogenesis                            |                 8|                3|            5|                     5|          62.50|                2.67|
## |defense response to bacterium                                               |                 8|                2|            4|                     6|          75.00|                4.00|
## |extracellular matrix organization                                           |                 8|                3|            5|                     5|          62.50|                2.67|
## |heterophilic cell-cell adhesion via plasma membrane cell adhesion molecules |                 8|                3|            5|                     5|          62.50|                2.67|
## |intracellular signal transduction                                           |                 8|                5|            4|                     3|          37.50|                1.60|
## |positive regulation of mesenchymal stem cell proliferation                  |                 8|                3|            5|                     5|          62.50|                2.67|

Detailed Analysis: Top Terms

# Focus on top 30 terms from previous analysis
top_30_terms <- head(go_term_analysis$go_term, 30)

# For each top term, show which group_ids appear in multiple components
redundancy_details <- go_expanded %>%
  filter(go_term %in% top_30_terms) %>%
  group_by(go_term, group_id) %>%
  summarise(
    n_components = n_distinct(component),
    components = paste(sort(unique(component)), collapse = ", "),
    .groups = "drop"
  ) %>%
  filter(n_components > 1) %>%
  arrange(go_term, desc(n_components))

cat(sprintf("\nFound %d group_ids that appear in multiple components\n", nrow(redundancy_details)))
## 
## Found 88 group_ids that appear in multiple components
cat(sprintf("Across the top 30 GO terms\n\n"))
## Across the top 30 GO terms
# Show examples
if (nrow(redundancy_details) > 0) {
  cat("### Examples of Group IDs Appearing in Multiple Components\n\n")
  print(kable(head(redundancy_details, 50)))
}
## ### Examples of Group IDs Appearing in Multiple Components
## 
## 
## 
## |go_term                                                                     |group_id | n_components|components                 |
## |:---------------------------------------------------------------------------|:--------|------------:|:--------------------------|
## |DNA repair                                                                  |OG_04129 |            4|comp1, comp2, comp4, comp5 |
## |DNA repair                                                                  |OG_09650 |            2|comp1, comp4               |
## |Wnt signaling pathway                                                       |OG_04737 |            4|comp1, comp2, comp4, comp5 |
## |Wnt signaling pathway                                                       |OG_07762 |            4|comp1, comp2, comp4, comp5 |
## |Wnt signaling pathway                                                       |OG_01366 |            2|comp4, comp5               |
## |Wnt signaling pathway                                                       |OG_03779 |            2|comp3, comp5               |
## |basement membrane organization                                              |OG_02599 |            4|comp1, comp2, comp4, comp5 |
## |basement membrane organization                                              |OG_01941 |            3|comp1, comp2, comp4        |
## |branching involved in ureteric bud morphogenesis                            |OG_01489 |            4|comp1, comp2, comp4, comp5 |
## |branching involved in ureteric bud morphogenesis                            |OG_10087 |            3|comp1, comp2, comp4        |
## |cell division                                                               |OG_07762 |            4|comp1, comp2, comp4, comp5 |
## |cell division                                                               |OG_01941 |            3|comp1, comp2, comp4        |
## |cell division                                                               |OG_05776 |            3|comp2, comp4, comp5        |
## |cell division                                                               |OG_03926 |            2|comp4, comp5               |
## |cell population proliferation                                               |OG_02599 |            4|comp1, comp2, comp4, comp5 |
## |cell population proliferation                                               |OG_05208 |            4|comp1, comp2, comp4, comp5 |
## |cell population proliferation                                               |OG_06522 |            4|comp1, comp2, comp4, comp5 |
## |cellular response to leukemia inhibitory factor                             |OG_00887 |            4|comp1, comp2, comp4, comp5 |
## |cellular response to leukemia inhibitory factor                             |OG_05395 |            4|comp1, comp2, comp4, comp5 |
## |cellular response to leukemia inhibitory factor                             |OG_01366 |            2|comp4, comp5               |
## |defense response to bacterium                                               |OG_01210 |            4|comp1, comp2, comp4, comp5 |
## |defense response to bacterium                                               |OG_04170 |            4|comp1, comp2, comp4, comp5 |
## |extracellular matrix organization                                           |OG_04737 |            4|comp1, comp2, comp4, comp5 |
## |extracellular matrix organization                                           |OG_10087 |            3|comp1, comp2, comp4        |
## |heterophilic cell-cell adhesion via plasma membrane cell adhesion molecules |OG_01489 |            4|comp1, comp2, comp4, comp5 |
## |heterophilic cell-cell adhesion via plasma membrane cell adhesion molecules |OG_01941 |            3|comp1, comp2, comp4        |
## |homophilic cell adhesion via plasma membrane adhesion molecules             |OG_01489 |            4|comp1, comp2, comp4, comp5 |
## |homophilic cell adhesion via plasma membrane adhesion molecules             |OG_01941 |            3|comp1, comp2, comp4        |
## |intracellular signal transduction                                           |OG_04195 |            3|comp2, comp4, comp5        |
## |intracellular signal transduction                                           |OG_09727 |            2|comp3, comp5               |
## |kidney development                                                          |OG_01489 |            4|comp1, comp2, comp4, comp5 |
## |kidney development                                                          |OG_02599 |            4|comp1, comp2, comp4, comp5 |
## |kidney development                                                          |OG_04195 |            3|comp2, comp4, comp5        |
## |negative regulation of apoptotic process                                    |OG_02599 |            4|comp1, comp2, comp4, comp5 |
## |negative regulation of apoptotic process                                    |OG_04195 |            3|comp2, comp4, comp5        |
## |negative regulation of apoptotic process                                    |OG_05776 |            3|comp2, comp4, comp5        |
## |negative regulation of cell population proliferation                        |OG_02599 |            4|comp1, comp2, comp4, comp5 |
## |negative regulation of cell population proliferation                        |OG_09650 |            2|comp1, comp4               |
## |negative regulation of transcription by RNA polymerase II                   |OG_02599 |            4|comp1, comp2, comp4, comp5 |
## |negative regulation of transcription by RNA polymerase II                   |OG_05110 |            4|comp1, comp2, comp4, comp5 |
## |negative regulation of transcription by RNA polymerase II                   |OG_05395 |            4|comp1, comp2, comp4, comp5 |
## |negative regulation of transcription by RNA polymerase II                   |OG_07762 |            4|comp1, comp2, comp4, comp5 |
## |negative regulation of transcription by RNA polymerase II                   |OG_05348 |            3|comp1, comp2, comp4        |
## |negative regulation of transcription by RNA polymerase II                   |OG_01366 |            2|comp4, comp5               |
## |negative regulation of transcription by RNA polymerase II                   |OG_09650 |            2|comp1, comp4               |
## |nervous system development                                                  |OG_02599 |            4|comp1, comp2, comp4, comp5 |
## |nervous system development                                                  |OG_05400 |            4|comp1, comp2, comp4, comp5 |
## |nervous system development                                                  |OG_09806 |            3|comp1, comp3, comp5        |
## |nervous system development                                                  |OG_03926 |            2|comp4, comp5               |
## |neuron projection morphogenesis                                             |OG_04335 |            4|comp1, comp2, comp4, comp5 |

Summary Statistics

# Overall statistics
overall_stats <- data.frame(
  Metric = c(
    "Total GO term occurrences",
    "Unique GO terms",
    "Unique group_ids with GO terms",
    "Redundant occurrences (same group_id in multiple components)",
    "Overall redundancy percentage"
  ),
  Value = c(
    nrow(go_expanded),
    n_distinct(go_expanded$go_term),
    n_distinct(go_expanded$group_id),
    sum(go_term_analysis$redundant_occurrences),
    round(sum(go_term_analysis$redundant_occurrences) / nrow(go_expanded) * 100, 2)
  )
)

cat("\n### Overall Statistics\n\n")
## 
## ### Overall Statistics
print(kable(overall_stats))
## 
## 
## |Metric                                                       |   Value|
## |:------------------------------------------------------------|-------:|
## |Total GO term occurrences                                    | 2290.00|
## |Unique GO terms                                              |  833.00|
## |Unique group_ids with GO terms                               |   94.00|
## |Redundant occurrences (same group_id in multiple components) | 1154.00|
## |Overall redundancy percentage                                |   50.39|
# Stats for top 30 terms
top_30_stats <- go_term_analysis %>%
  slice_head(n = 30) %>%
  summarise(
    total_occurrences = sum(total_occurrences),
    redundant_occurrences = sum(redundant_occurrences),
    avg_redundancy_pct = mean(redundancy_pct)
  )

cat("\n### Top 30 Terms Statistics\n\n")
## 
## ### Top 30 Terms Statistics
cat(sprintf("Total occurrences: %d\n", top_30_stats$total_occurrences))
## Total occurrences: 372
cat(sprintf("Redundant occurrences: %d\n", top_30_stats$redundant_occurrences))
## Redundant occurrences: 210
cat(sprintf("Redundancy percentage: %.2f%%\n", (top_30_stats$redundant_occurrences / top_30_stats$total_occurrences * 100)))
## Redundancy percentage: 56.45%
cat(sprintf("Average redundancy per term: %.2f%%\n", top_30_stats$avg_redundancy_pct))
## Average redundancy per term: 57.63%

Visualization: Redundancy Analysis

# Plot for top 30 terms
plot_data <- go_term_analysis %>%
  slice_head(n = 30) %>%
  mutate(
    go_term = factor(go_term, levels = rev(go_term))
  ) %>%
  pivot_longer(
    cols = c(unique_group_ids, redundant_occurrences),
    names_to = "type",
    values_to = "count"
  ) %>%
  mutate(
    type = case_when(
      type == "unique_group_ids" ~ "Unique group_ids",
      type == "redundant_occurrences" ~ "Redundant (same group_id)"
    )
  )

p1 <- ggplot(plot_data, aes(x = go_term, y = count, fill = type)) +
  geom_bar(stat = "identity", position = "stack") +
  coord_flip() +
  scale_fill_manual(
    values = c("Unique group_ids" = "steelblue", 
               "Redundant (same group_id)" = "orange"),
    name = "Occurrence Type"
  ) +
  labs(
    title = "Redundancy Analysis: Top 30 GO BP Terms (Rank 05)",
    subtitle = "Blue = unique group_ids, Orange = redundant occurrences (same group_id in multiple components)",
    x = "GO Biological Process Term",
    y = "Count"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray40"),
    axis.text.y = element_text(size = 10),
    legend.position = "bottom"
  )

print(p1)

ggsave(
  filename = file.path(output_dir, "rank05_GO_redundancy_analysis.png"),
  plot = p1,
  width = 14,
  height = 10,
  dpi = 300
)

Redundancy Percentage Plot

p2 <- go_term_analysis %>%
  slice_head(n = 30) %>%
  mutate(go_term = factor(go_term, levels = rev(go_term))) %>%
  ggplot(aes(x = go_term, y = redundancy_pct)) +
  geom_bar(stat = "identity", fill = "coral", alpha = 0.8) +
  geom_text(aes(label = sprintf("%.1f%%", redundancy_pct)), 
            hjust = -0.1, size = 3) +
  coord_flip() +
  labs(
    title = "Redundancy Percentage: Top 30 GO BP Terms (Rank 05)",
    subtitle = "Percentage of occurrences due to same group_id appearing in multiple components",
    x = "GO Biological Process Term",
    y = "Redundancy Percentage (%)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray40"),
    axis.text.y = element_text(size = 10),
    panel.grid.major.y = element_blank()
  )

print(p2)

ggsave(
  filename = file.path(output_dir, "rank05_GO_redundancy_percentage.png"),
  plot = p2,
  width = 14,
  height = 10,
  dpi = 300
)

Group IDs Appearing in Most Components

# Which group_ids appear in the most components?
group_id_frequency <- go_expanded %>%
  group_by(group_id) %>%
  summarise(
    n_components = n_distinct(component),
    n_go_terms = n_distinct(go_term),
    go_terms_sample = paste(head(unique(go_term), 3), collapse = "; ")
  ) %>%
  arrange(desc(n_components)) %>%
  head(50)

cat("\n### Top 50 Group IDs by Component Frequency\n\n")
## 
## ### Top 50 Group IDs by Component Frequency
print(kable(head(group_id_frequency, 50)))
## 
## 
## |group_id | n_components| n_go_terms|go_terms_sample                                                                                                                                                    |
## |:--------|------------:|----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------|
## |OG_04439 |            5|          1|cilium movement involved in cell motility                                                                                                                          |
## |OG_00887 |            4|          6|cellular response to leukemia inhibitory factor; glycine catabolic process; glycine decarboxylation via glycine cleavage system                                    |
## |OG_01210 |            4|         12|blood circulation; defense response to bacterium; immune system process                                                                                            |
## |OG_01475 |            4|          5|chloride transmembrane transport; chloride transport; oxalate transport                                                                                            |
## |OG_01489 |            4|         18|branching involved in ureteric bud morphogenesis; cerebral cortex development; condensed mesenchymal cell proliferation                                            |
## |OG_02599 |            4|         64|anterior neuropore closure; basement membrane organization; bone morphogenesis                                                                                     |
## |OG_03061 |            4|          3|pore complex assembly; protein localization to plasma membrane; protein maturation                                                                                 |
## |OG_04129 |            4|          3|base-excision repair; DNA repair; response to oxidative stress                                                                                                     |
## |OG_04170 |            4|          4|defense response to bacterium; hydrogen peroxide catabolic process; immune response                                                                                |
## |OG_04335 |            4|          5|axon guidance; modulation of chemical synaptic transmission; negative regulation of fat cell differentiation                                                       |
## |OG_04737 |            4|         13|blood vessel maturation; canonical Wnt signaling pathway; dorsal root ganglion development                                                                         |
## |OG_04999 |            4|          8|camera-type eye development; circadian regulation of gene expression; embryonic retina morphogenesis in camera-type eye                                            |
## |OG_05110 |            4|         11|atrial septum primum morphogenesis; atrial septum secundum morphogenesis; bone development                                                                         |
## |OG_05208 |            4|          9|cell population proliferation; cholangiocyte proliferation; chromosome segregation                                                                                 |
## |OG_05395 |            4|         15|angiogenesis; cellular response to leukemia inhibitory factor; cellular response to peptide                                                                        |
## |OG_05400 |            4|          9|bone resorption; epithelial cell differentiation; lysosomal protein catabolic process                                                                              |
## |OG_06522 |            4|          7|astrocyte development; cell population proliferation; cell surface receptor protein tyrosine kinase signaling pathway                                              |
## |OG_07103 |            4|          1|unsaturated fatty acid biosynthetic process                                                                                                                        |
## |OG_07519 |            4|          9|actin filament organization; auditory receptor cell stereocilium organization; cilium assembly                                                                     |
## |OG_07631 |            4|          7|fatty acid elongation, monounsaturated fatty acid; fatty acid elongation, polyunsaturated fatty acid; fatty acid elongation, saturated fatty acid                  |
## |OG_07762 |            4|         27|animal organ regeneration; antral ovarian follicle growth; cell division                                                                                           |
## |OG_07861 |            4|          3|cell development; neuron differentiation; regulation of transcription by RNA polymerase II                                                                         |
## |OG_08226 |            4|          4|actomyosin contractile ring assembly; mitotic cytokinesis; positive regulation of bleb assembly                                                                    |
## |OG_01941 |            3|          7|actin cytoskeleton organization; basement membrane organization; cell division                                                                                     |
## |OG_02550 |            3|          1|deoxyribonucleotide biosynthetic process                                                                                                                           |
## |OG_04075 |            3|          1|blastocyst hatching                                                                                                                                                |
## |OG_04195 |            3|         67|activin receptor signaling pathway; angiogenesis; angiogenesis involved in coronary vascular morphogenesis                                                         |
## |OG_05348 |            3|         26|adenylate cyclase-inhibiting G protein-coupled receptor signaling pathway; aorta development; cAMP-mediated signaling                                              |
## |OG_05445 |            3|          5|cellular response to nitric oxide; cGMP biosynthetic process; cGMP-mediated signaling                                                                              |
## |OG_05776 |            3|          6|cell division; central nervous system development; mitotic nuclear membrane reassembly                                                                             |
## |OG_09806 |            3|         28|artery morphogenesis; cellular response to lipoprotein particle stimulus; cholesterol efflux                                                                       |
## |OG_10087 |            3|          6|branching involved in ureteric bud morphogenesis; extracellular matrix organization; male gamete generation                                                        |
## |OG_01366 |            2|          4|cellular response to leukemia inhibitory factor; negative regulation of canonical Wnt signaling pathway; negative regulation of transcription by RNA polymerase II |
## |OG_03332 |            2|         14|autophagosome maturation; cellular response to glucose starvation; early endosome to late endosome transport                                                       |
## |OG_03779 |            2|         15|cell migration; establishment or maintenance of cell polarity; Golgi to plasma membrane protein transport                                                          |
## |OG_03926 |            2|          3|cell division; nervous system development; regulation of cell cycle                                                                                                |
## |OG_04041 |            2|         16|B cell differentiation; definitive hemopoiesis; embryonic camera-type eye morphogenesis                                                                            |
## |OG_04538 |            2|          2|antibacterial innate immune response; defense response to Gram-negative bacterium                                                                                  |
## |OG_05765 |            2|          5|DNA replication; DNA replication initiation; double-strand break repair via break-induced replication                                                              |
## |OG_07349 |            2|          7|cell-matrix adhesion; cortical microtubule organization; negative regulation of protein localization to plasma membrane                                            |
## |OG_07351 |            2|          8|adenylate cyclase-activating G protein-coupled receptor signaling pathway; cellular response to luteinizing hormone stimulus; hormone-mediated signaling pathway   |
## |OG_07441 |            2|         15|adult walking behavior; brain development; cell differentiation                                                                                                    |
## |OG_08089 |            2|         14|cilium assembly; cloaca development; cytoskeleton organization                                                                                                     |
## |OG_08842 |            2|          1|sensory perception of sound                                                                                                                                        |
## |OG_09650 |            2|         19|anterior/posterior pattern specification; associative learning; cellular response to phorbol 13-acetate 12-myristate                                               |
## |OG_09727 |            2|          4|intracellular signal transduction; negative regulation of Ras protein signal transduction; platelet activation                                                     |
## |OG_00145 |            1|          8|embryonic organ development; hippo signaling; positive regulation of cell growth                                                                                   |
## |OG_00281 |            1|         12|cell cycle comprising mitosis without cytokinesis; chorionic trophoblast cell differentiation; fibroblast proliferation                                            |
## |OG_00403 |            1|          8|cerebellum development; glutamate biosynthetic process; glutamate catabolic process                                                                                |
## |OG_00615 |            1|          2|proteolysis; regulation of inflammatory response                                                                                                                   |
# Plot
p3 <- group_id_frequency %>%
  head(30) %>%
  mutate(group_id = factor(group_id, levels = rev(group_id))) %>%
  ggplot(aes(x = group_id, y = n_components)) +
  geom_bar(stat = "identity", fill = "darkgreen", alpha = 0.7) +
  geom_text(aes(label = n_components), hjust = -0.2, size = 3) +
  coord_flip() +
  labs(
    title = "Top 30 Group IDs by Number of Components (Rank 05)",
    subtitle = "Group IDs that appear in the most components",
    x = "Group ID",
    y = "Number of Components"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.text.y = element_text(size = 9),
    panel.grid.major.y = element_blank()
  )

print(p3)

ggsave(
  filename = file.path(output_dir, "rank05_most_frequent_group_ids.png"),
  plot = p3,
  width = 12,
  height = 8,
  dpi = 300
)

Export Detailed Results

# Export full redundancy analysis
write_csv(
  go_term_analysis,
  file.path(output_dir, "rank05_GO_redundancy_full_analysis.csv")
)

# Export group IDs appearing in multiple components
write_csv(
  redundancy_details,
  file.path(output_dir, "rank05_redundant_group_ids_details.csv")
)

# Export group ID frequency
write_csv(
  group_id_frequency,
  file.path(output_dir, "rank05_group_id_component_frequency.csv")
)

cat("\nExported detailed results to CSV files\n")
## 
## Exported detailed results to CSV files

Session Info

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.7.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] C
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.48         RColorBrewer_1.1-3 lubridate_1.9.3    forcats_1.0.0     
##  [5] stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2        readr_2.1.5       
##  [9] tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0   
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9        utf8_1.2.4        generics_0.1.3    stringi_1.8.4    
##  [5] hms_1.1.3         digest_0.6.37     magrittr_2.0.3    evaluate_1.0.0   
##  [9] grid_4.3.2        timechange_0.3.0  fastmap_1.2.0     jsonlite_1.8.9   
## [13] fansi_1.0.6       scales_1.3.0      textshaping_0.4.0 jquerylib_0.1.4  
## [17] cli_3.6.3         rlang_1.1.4       crayon_1.5.3      bit64_4.5.2      
## [21] munsell_0.5.1     withr_3.0.1       cachem_1.1.0      yaml_2.3.10      
## [25] tools_4.3.2       parallel_4.3.2    tzdb_0.4.0        colorspace_2.1-1 
## [29] vctrs_0.6.5       R6_2.5.1          lifecycle_1.0.4   bit_4.5.0        
## [33] vroom_1.6.5       ragg_1.3.3        pkgconfig_2.0.3   pillar_1.9.0     
## [37] bslib_0.8.0       gtable_0.3.6      glue_1.8.0        systemfonts_1.1.0
## [41] highr_0.11        xfun_0.48         tidyselect_1.2.1  farver_2.1.2     
## [45] htmltools_0.5.8.1 rmarkdown_2.28    labeling_0.4.3    compiler_4.3.2