Overview

This analysis visualizes the Biological Process (BP) Gene Ontology terms from the top 100 genes in each of the 35 components from the rank 35 tensor decomposition.

Load Libraries

library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(scales)

Define Paths

# Input directory
input_dir <- "/Users/sr320/Documents/GitHub/timeseries_molecular/M-multi-species/output/22-Visualizing-Rank-outs"

# Output directory
output_dir <- "/Users/sr320/Documents/GitHub/timeseries_molecular/M-multi-species/output/23-visualizing-rank35"

# Create output directory if it doesn't exist
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

Get List of Files

# Get all rank_35 top100 annotation files
files <- list.files(input_dir, 
                   pattern = "rank_35_comp.*_top100_annotation\\.csv$", 
                   full.names = TRUE)

# Sort files by component number
file_df <- data.frame(
  path = files,
  component = str_extract(basename(files), "comp\\d+")
) %>%
  mutate(comp_num = as.numeric(str_extract(component, "\\d+"))) %>%
  arrange(comp_num)

cat(sprintf("Found %d files to process\n", nrow(file_df)))
## Found 35 files to process

Function to Parse GO BP Terms

# Function to parse GO BP column
parse_go_bp <- function(go_bp_string) {
  if (is.na(go_bp_string) || go_bp_string == "") {
    return(NULL)
  }
  
  # Split by semicolon and trim whitespace
  terms <- str_split(go_bp_string, ";")[[1]] %>%
    str_trim()
  
  # Extract term name (everything before [GO:...)
  term_names <- str_replace(terms, "\\s*\\[GO:.*\\]", "")
  
  return(term_names)
}

# Function to process a single file
process_file <- function(file_path, component_name) {
  # Read the file
  data <- read_csv(file_path, show_col_types = FALSE)
  
  # Extract all GO BP terms
  all_terms <- data %>%
    filter(!is.na(go_bp) & go_bp != "") %>%
    pull(go_bp) %>%
    map(parse_go_bp) %>%
    unlist()
  
  if (length(all_terms) == 0) {
    return(NULL)
  }
  
  # Count term frequencies
  term_counts <- as.data.frame(table(all_terms)) %>%
    rename(term = all_terms, count = Freq) %>%
    arrange(desc(count)) %>%
    head(20)  # Top 20 terms
  
  term_counts$component <- component_name
  
  return(term_counts)
}

Process All Files and Generate Visualizations

# Store all results for summary
all_results <- list()

for (i in 1:nrow(file_df)) {
  file_path <- file_df$path[i]
  component <- file_df$component[i]
  comp_num <- file_df$comp_num[i]
  
  cat(sprintf("\n\n## Component %d\n\n", comp_num))
  
  # Process the file
  term_counts <- process_file(file_path, component)
  
  if (is.null(term_counts) || nrow(term_counts) == 0) {
    cat("No GO BP terms found for this component.\n\n")
    next
  }
  
  all_results[[component]] <- term_counts
  
  # Create visualization
  p <- ggplot(term_counts, aes(x = reorder(term, count), y = count)) +
    geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
    coord_flip() +
    labs(
      title = sprintf("Top 20 Biological Process GO Terms - %s", component),
      subtitle = sprintf("From top 100 genes in rank 35 decomposition"),
      x = "GO Biological Process Term",
      y = "Frequency"
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(size = 16, face = "bold"),
      plot.subtitle = element_text(size = 12, color = "gray40"),
      axis.text.y = element_text(size = 10),
      axis.text.x = element_text(size = 10),
      panel.grid.major.y = element_blank(),
      panel.grid.minor = element_blank()
    )
  
  print(p)
  
  # Save individual plot
  ggsave(
    filename = file.path(output_dir, sprintf("rank35_%s_BP_GO_top20.png", component)),
    plot = p,
    width = 14,
    height = 10,
    dpi = 300
  )
  
  # Print top 10 terms as table
  cat("\n### Top 10 Terms\n\n")
  print(knitr::kable(head(term_counts, 10), row.names = FALSE))
  cat("\n\n")
}

Component 1

### Top 10 Terms

term count component
defense response to Gram-negative bacterium 3 comp1
innate immune response 3 comp1
positive regulation of gene expression 3 comp1
positive regulation of transcription by RNA polymerase II 3 comp1
proteolysis 3 comp1
response to bacterium 3 comp1
signal transduction 3 comp1
actin cytoskeleton organization 2 comp1
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp1
blastocyst development 2 comp1

Component 2

### Top 10 Terms

term count component
innate immune response 4 comp2
proteolysis 4 comp2
cell adhesion 3 comp2
cell division 3 comp2
actin cytoskeleton organization 2 comp2
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp2
basement membrane organization 2 comp2
cell surface receptor signaling pathway 2 comp2
collagen fibril organization 2 comp2
defense response to Gram-negative bacterium 2 comp2

Component 3

### Top 10 Terms

term count component
proteolysis 4 comp3
cell differentiation 3 comp3
innate immune response 3 comp3
positive regulation of gene expression 3 comp3
actin cytoskeleton organization 2 comp3
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp3
defense response to Gram-negative bacterium 2 comp3
gene expression 2 comp3
heart trabecula formation 2 comp3
inflammatory response 2 comp3

Component 4

### Top 10 Terms

term count component
defense response to Gram-negative bacterium 4 comp4
innate immune response 4 comp4
positive regulation of gene expression 4 comp4
positive regulation of transcription by RNA polymerase II 4 comp4
cell adhesion 3 comp4
cell division 3 comp4
mRNA processing 3 comp4
negative regulation of apoptotic process 3 comp4
negative regulation of transcription by RNA polymerase II 3 comp4
proteolysis 3 comp4

Component 5

### Top 10 Terms

term count component
defense response to Gram-negative bacterium 3 comp5
antibacterial innate immune response 2 comp5
epithelial cell proliferation 2 comp5
flagellated sperm motility 2 comp5
positive regulation of acrosome reaction 2 comp5
positive regulation of epithelial cell proliferation 2 comp5
positive regulation of gene expression 2 comp5
proteolysis 2 comp5
regulation of transcription by RNA polymerase II 2 comp5
triglyceride catabolic process 2 comp5

Component 6

### Top 10 Terms

term count component
gene expression 3 comp6
innate immune response 3 comp6
negative regulation of apoptotic process 3 comp6
positive regulation of gene expression 3 comp6
positive regulation of phosphatidylinositol 3-kinase/protein kinase B signal transduction 3 comp6
signal transduction 3 comp6
actin cytoskeleton organization 2 comp6
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp6
basement membrane organization 2 comp6
cell differentiation 2 comp6

Component 7

### Top 10 Terms

term count component
innate immune response 4 comp7
positive regulation of transcription by RNA polymerase II 4 comp7
defense response to Gram-negative bacterium 3 comp7
gene expression 3 comp7
negative regulation of transcription by RNA polymerase II 3 comp7
positive regulation of gene expression 3 comp7
proteolysis 3 comp7
response to bacterium 3 comp7
response to lipopolysaccharide 3 comp7
signal transduction 3 comp7

Component 8

### Top 10 Terms

term count component
positive regulation of transcription by RNA polymerase II 4 comp8
defense response to Gram-negative bacterium 3 comp8
gene expression 3 comp8
innate immune response 3 comp8
positive regulation of gene expression 3 comp8
response to bacterium 3 comp8
signal transduction 3 comp8
actin cytoskeleton organization 2 comp8
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp8
basement membrane organization 2 comp8

Component 9

### Top 10 Terms

term count component
regulation of transcription by RNA polymerase II 7 comp9
positive regulation of transcription by RNA polymerase II 6 comp9
negative regulation of transcription by RNA polymerase II 4 comp9
positive regulation of gene expression 4 comp9
cilium assembly 3 comp9
negative regulation of apoptotic process 3 comp9
positive regulation of DNA-templated transcription 3 comp9
MAPK cascade 2 comp9
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp9
basement membrane organization 2 comp9

Component 10

### Top 10 Terms

term count component
flagellated sperm motility 3 comp10
RNA splicing 2 comp10
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp10
axonemal central apparatus assembly 2 comp10
cell surface receptor signaling pathway 2 comp10
cilium assembly 2 comp10
cilium movement involved in cell motility 2 comp10
epithelial cilium movement involved in extracellular fluid movement 2 comp10
innate immune response 2 comp10
mRNA processing 2 comp10

Component 11

### Top 10 Terms

term count component
cell adhesion 3 comp11
cell division 3 comp11
proteolysis 3 comp11
response to lipopolysaccharide 3 comp11
signal transduction 3 comp11
actin cytoskeleton organization 2 comp11
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp11
basement membrane organization 2 comp11
cell surface receptor signaling pathway 2 comp11
cilium assembly 2 comp11

Component 12

### Top 10 Terms

term count component
negative regulation of transcription by RNA polymerase II 4 comp12
Notch signaling pathway 2 comp12
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp12
defense response to Gram-negative bacterium 2 comp12
epithelial cell proliferation 2 comp12
flagellated sperm motility 2 comp12
heart trabecula formation 2 comp12
liver development 2 comp12
muscle organ development 2 comp12
negative regulation of apoptotic process 2 comp12

Component 13

### Top 10 Terms

term count component
innate immune response 5 comp13
defense response to Gram-negative bacterium 3 comp13
gene expression 3 comp13
positive regulation of gene expression 3 comp13
positive regulation of transcription by RNA polymerase II 3 comp13
response to bacterium 3 comp13
signal transduction 3 comp13
DNA repair 2 comp13
actin cytoskeleton organization 2 comp13
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp13

Component 14

### Top 10 Terms

term count component
positive regulation of transcription by RNA polymerase II 4 comp14
proteolysis 4 comp14
defense response to Gram-negative bacterium 3 comp14
in utero embryonic development 3 comp14
innate immune response 3 comp14
positive regulation of gene expression 3 comp14
signal transduction 3 comp14
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp14
cell surface receptor signaling pathway 2 comp14
gene expression 2 comp14

Component 15

### Top 10 Terms

term count component
cell adhesion 3 comp15
cell division 3 comp15
innate immune response 3 comp15
negative regulation of apoptotic process 3 comp15
actin cytoskeleton organization 2 comp15
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp15
basement membrane organization 2 comp15
cell fate commitment 2 comp15
cell surface receptor signaling pathway 2 comp15
cilium assembly 2 comp15

Component 16

### Top 10 Terms

term count component
innate immune response 4 comp16
positive regulation of gene expression 4 comp16
positive regulation of transcription by RNA polymerase II 4 comp16
adenylate cyclase-activating G protein-coupled receptor signaling pathway 3 comp16
basement membrane organization 3 comp16
cell adhesion 3 comp16
cell division 3 comp16
gene expression 3 comp16
negative regulation of apoptotic process 3 comp16
proteolysis 3 comp16

Component 17

### Top 10 Terms

term count component
proteolysis 4 comp17
visual perception 4 comp17
negative regulation of transcription by RNA polymerase II 3 comp17
regulation of transcription by RNA polymerase II 3 comp17
Notch signaling pathway 2 comp17
Wnt signaling pathway 2 comp17
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp17
basement membrane organization 2 comp17
cell adhesion 2 comp17
cell division 2 comp17

Component 18

### Top 10 Terms

term count component
flagellated sperm motility 3 comp18
positive regulation of transcription by RNA polymerase II 3 comp18
signal transduction 3 comp18
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp18
cell surface receptor signaling pathway 2 comp18
cellular response to heat 2 comp18
cilium assembly 2 comp18
gene expression 2 comp18
innate immune response 2 comp18
mRNA processing 2 comp18

Component 19

### Top 10 Terms

term count component
innate immune response 4 comp19
positive regulation of transcription by RNA polymerase II 3 comp19
proteolysis 3 comp19
Notch signaling pathway 2 comp19
axonogenesis 2 comp19
defense response to Gram-negative bacterium 2 comp19
defense response to bacterium 2 comp19
gene expression 2 comp19
negative regulation of DNA-templated transcription 2 comp19
ovarian follicle development 2 comp19

Component 20

### Top 10 Terms

term count component
cilium assembly 3 comp20
proteolysis 3 comp20
signal transduction 3 comp20
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp20
basement membrane organization 2 comp20
cell surface receptor signaling pathway 2 comp20
flagellated sperm motility 2 comp20
gene expression 2 comp20
innate immune response 2 comp20
negative regulation of apoptotic process 2 comp20

Component 21

### Top 10 Terms

term count component
gene expression 3 comp21
negative regulation of transcription by RNA polymerase II 3 comp21
positive regulation of gene expression 3 comp21
proteolysis 3 comp21
response to lipopolysaccharide 3 comp21
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp21
basement membrane organization 2 comp21
cell surface receptor signaling pathway 2 comp21
cilium assembly 2 comp21
defense response to Gram-negative bacterium 2 comp21

Component 22

### Top 10 Terms

term count component
cell adhesion 4 comp22
innate immune response 4 comp22
proteolysis 4 comp22
cell division 3 comp22
positive regulation of gene expression 3 comp22
positive regulation of transcription by RNA polymerase II 3 comp22
signal transduction 3 comp22
DNA repair 2 comp22
actin cytoskeleton organization 2 comp22
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp22

Component 23

### Top 10 Terms

term count component
signal transduction 4 comp23
adenylate cyclase-activating G protein-coupled receptor signaling pathway 3 comp23
proteolysis 3 comp23
G protein-coupled receptor signaling pathway 2 comp23
apoptotic process 2 comp23
cell surface receptor signaling pathway 2 comp23
defense response to Gram-negative bacterium 2 comp23
innate immune response 2 comp23
positive regulation of reactive oxygen species biosynthetic process 2 comp23
DNA double-strand break processing 1 comp23

Component 24

### Top 10 Terms

term count component
positive regulation of transcription by RNA polymerase II 5 comp24
defense response to Gram-negative bacterium 4 comp24
positive regulation of gene expression 3 comp24
proteolysis 3 comp24
regulation of transcription by RNA polymerase II 3 comp24
response to bacterium 3 comp24
DNA repair 2 comp24
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp24
blastocyst development 2 comp24
cell surface receptor signaling pathway 2 comp24

Component 25

### Top 10 Terms

term count component
defense response to Gram-negative bacterium 3 comp25
positive regulation of transcription by RNA polymerase II 3 comp25
actin cytoskeleton organization 2 comp25
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp25
cell surface receptor signaling pathway 2 comp25
flagellated sperm motility 2 comp25
mRNA processing 2 comp25
negative regulation of apoptotic process 2 comp25
negative regulation of transcription by RNA polymerase II 2 comp25
proteolysis 2 comp25

Component 26

### Top 10 Terms

term count component
positive regulation of transcription by RNA polymerase II 5 comp26
positive regulation of gene expression 4 comp26
defense response to Gram-negative bacterium 3 comp26
innate immune response 3 comp26
mRNA processing 3 comp26
proteolysis 3 comp26
regulation of transcription by RNA polymerase II 3 comp26
signal transduction 3 comp26
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp26
cell division 2 comp26

Component 27

### Top 10 Terms

term count component
proteolysis 4 comp27
defense response to Gram-negative bacterium 3 comp27
innate immune response 3 comp27
mRNA processing 3 comp27
positive regulation of gene expression 3 comp27
positive regulation of transcription by RNA polymerase II 3 comp27
signal transduction 3 comp27
DNA repair 2 comp27
RNA splicing 2 comp27
actin cytoskeleton organization 2 comp27

Component 28

### Top 10 Terms

term count component
positive regulation of transcription by RNA polymerase II 5 comp28
defense response to Gram-negative bacterium 3 comp28
gene expression 3 comp28
innate immune response 3 comp28
positive regulation of gene expression 3 comp28
signal transduction 3 comp28
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp28
basement membrane organization 2 comp28
cell adhesion 2 comp28
cell division 2 comp28

Component 29

### Top 10 Terms

term count component
proteolysis 4 comp29
innate immune response 3 comp29
positive regulation of gene expression 3 comp29
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp29
cell adhesion 2 comp29
cell surface receptor signaling pathway 2 comp29
defense response to Gram-negative bacterium 2 comp29
extracellular matrix organization 2 comp29
in utero embryonic development 2 comp29
positive regulation of interleukin-6 production 2 comp29

Component 30

### Top 10 Terms

term count component
adenylate cyclase-activating G protein-coupled receptor signaling pathway 3 comp30
cell surface receptor signaling pathway 3 comp30
negative regulation of apoptotic process 3 comp30
negative regulation of transcription by RNA polymerase II 3 comp30
positive regulation of gene expression 3 comp30
positive regulation of transcription by RNA polymerase II 3 comp30
regulation of transcription by RNA polymerase II 3 comp30
response to lipopolysaccharide 3 comp30
NADPH regeneration 2 comp30
basement membrane organization 2 comp30

Component 31

### Top 10 Terms

term count component
visual perception 4 comp31
defense response to Gram-negative bacterium 3 comp31
negative regulation of transcription by RNA polymerase II 3 comp31
positive regulation of transcription by RNA polymerase II 3 comp31
proteolysis 3 comp31
actin cytoskeleton organization 2 comp31
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp31
cell surface receptor signaling pathway 2 comp31
flagellated sperm motility 2 comp31
gene expression 2 comp31

Component 32

### Top 10 Terms

term count component
proteolysis 4 comp32
in utero embryonic development 3 comp32
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp32
cell surface receptor signaling pathway 2 comp32
flagellated sperm motility 2 comp32
protein homotetramerization 2 comp32
signal transduction 2 comp32
visual perception 2 comp32
CDP biosynthetic process 1 comp32
CMP catabolic process 1 comp32

Component 33

### Top 10 Terms

term count component
cilium assembly 4 comp33
Notch signaling pathway 2 comp33
adenylate cyclase-activating G protein-coupled receptor signaling pathway 2 comp33
cell differentiation 2 comp33
cell projection organization 2 comp33
cell surface receptor signaling pathway 2 comp33
defense response to Gram-negative bacterium 2 comp33
flagellated sperm motility 2 comp33
heart looping 2 comp33
negative regulation of transforming growth factor beta receptor signaling pathway 2 comp33

Component 34

### Top 10 Terms

term count component
heart development 4 comp34
negative regulation of apoptotic process 4 comp34
adenylate cyclase-activating G protein-coupled receptor signaling pathway 3 comp34
basement membrane organization 3 comp34
gene expression 3 comp34
negative regulation of transcription by RNA polymerase II 3 comp34
response to lipopolysaccharide 3 comp34
response to xenobiotic stimulus 3 comp34
Notch signaling pathway 2 comp34
axon guidance 2 comp34

Component 35

### Top 10 Terms

term count component
positive regulation of gene expression 4 comp35
positive regulation of transcription by RNA polymerase II 4 comp35
defense response to Gram-negative bacterium 3 comp35
gene expression 3 comp35
immune system process 3 comp35
innate immune response 3 comp35
proteolysis 3 comp35
regulation of transcription by RNA polymerase II 3 comp35
signal transduction 3 comp35
cell adhesion 2 comp35

Summary: Most Common GO BP Terms Across All Components

# Combine all results
if (length(all_results) > 0) {
  combined_data <- bind_rows(all_results)
  
  # Get overall most common terms
  overall_counts <- combined_data %>%
    group_by(term) %>%
    summarise(
      total_count = sum(count),
      n_components = n()
    ) %>%
    arrange(desc(total_count)) %>%
    head(30)
  
  # Create overall summary plot
  p_summary <- ggplot(overall_counts, aes(x = reorder(term, total_count), y = total_count)) +
    geom_bar(stat = "identity", fill = "darkblue", alpha = 0.7) +
    geom_text(aes(label = n_components), hjust = -0.3, size = 3, color = "red") +
    coord_flip() +
    labs(
      title = "Top 30 Biological Process GO Terms Across All Rank 35 Components",
      subtitle = "Red numbers indicate how many components contain each term",
      x = "GO Biological Process Term",
      y = "Total Frequency Across All Components"
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title = element_text(size = 16, face = "bold"),
      plot.subtitle = element_text(size = 12, color = "gray40"),
      axis.text.y = element_text(size = 10),
      axis.text.x = element_text(size = 10),
      panel.grid.major.y = element_blank(),
      panel.grid.minor = element_blank()
    )
  
  print(p_summary)
  
  # Save summary plot
  ggsave(
    filename = file.path(output_dir, "rank35_ALL_components_BP_GO_summary.png"),
    plot = p_summary,
    width = 14,
    height = 12,
    dpi = 300
  )
  
  # Save summary table
  write_csv(overall_counts, 
            file.path(output_dir, "rank35_BP_GO_summary_table.csv"))
  
  cat("\n### Top 30 GO BP Terms Summary\n\n")
  print(knitr::kable(overall_counts, row.names = FALSE))
}

## 
## ### Top 30 GO BP Terms Summary
## 
## 
## 
## |term                                                                      | total_count| n_components|
## |:-------------------------------------------------------------------------|-----------:|------------:|
## |innate immune response                                                    |          80|           27|
## |proteolysis                                                               |          79|           25|
## |positive regulation of gene expression                                    |          78|           27|
## |defense response to Gram-negative bacterium                               |          76|           30|
## |positive regulation of transcription by RNA polymerase II                 |          76|           21|
## |adenylate cyclase-activating G protein-coupled receptor signaling pathway |          69|           33|
## |cell surface receptor signaling pathway                                   |          59|           29|
## |gene expression                                                           |          59|           25|
## |signal transduction                                                       |          59|           21|
## |negative regulation of transcription by RNA polymerase II                 |          47|           18|
## |negative regulation of apoptotic process                                  |          46|           19|
## |basement membrane organization                                            |          36|           17|
## |cilium assembly                                                           |          36|           16|
## |actin cytoskeleton organization                                           |          31|           16|
## |cell adhesion                                                             |          29|           11|
## |response to bacterium                                                     |          28|           11|
## |response to lipopolysaccharide                                            |          28|           11|
## |regulation of transcription by RNA polymerase II                          |          28|            9|
## |flagellated sperm motility                                                |          26|           12|
## |mRNA processing                                                           |          25|           11|
## |cell division                                                             |          24|            9|
## |positive regulation of reactive oxygen species biosynthetic process       |          24|           12|
## |visual perception                                                         |          22|            9|
## |negative regulation of tumor necrosis factor production                   |          20|           10|
## |in utero embryonic development                                            |          18|            8|
## |DNA repair                                                                |          15|           10|
## |Notch signaling pathway                                                   |          13|            8|
## |cell differentiation                                                      |          13|            6|
## |blastocyst development                                                    |          12|            6|
## |defense response to bacterium                                             |          12|            6|

Component Comparison Heatmap

if (length(all_results) > 0) {
  # Get top 30 overall terms
  top_terms <- overall_counts$term[1:min(30, nrow(overall_counts))]
  
  # Create matrix of term counts by component
  heatmap_data <- combined_data %>%
    filter(term %in% top_terms) %>%
    select(component, term, count) %>%
    pivot_wider(names_from = component, values_from = count, values_fill = 0)
  
  # Convert to matrix format for better visualization
  heatmap_matrix <- heatmap_data %>%
    column_to_rownames("term") %>%
    as.matrix()
  
  # Reorder by component number
  comp_order <- str_sort(colnames(heatmap_matrix), numeric = TRUE)
  heatmap_matrix <- heatmap_matrix[, comp_order]
  
  # Convert back for ggplot
  heatmap_long <- heatmap_matrix %>%
    as.data.frame() %>%
    rownames_to_column("term") %>%
    pivot_longer(-term, names_to = "component", values_to = "count")
  
  # Order terms by total frequency
  term_order <- overall_counts$term[1:min(30, nrow(overall_counts))]
  heatmap_long$term <- factor(heatmap_long$term, levels = rev(term_order))
  
  # Order components numerically
  heatmap_long <- heatmap_long %>%
    mutate(comp_num = as.numeric(str_extract(component, "\\d+"))) %>%
    arrange(comp_num)
  heatmap_long$component <- factor(heatmap_long$component, 
                                   levels = unique(heatmap_long$component[order(heatmap_long$comp_num)]))
  
  # Create heatmap
  p_heatmap <- ggplot(heatmap_long, aes(x = component, y = term, fill = count)) +
    geom_tile(color = "white", size = 0.5) +
    scale_fill_gradient2(low = "white", mid = "lightblue", high = "darkblue", 
                        midpoint = max(heatmap_long$count)/2,
                        name = "Count") +
    labs(
      title = "GO BP Terms Distribution Across Rank 35 Components",
      subtitle = "Top 30 most frequent terms",
      x = "Component",
      y = "GO Biological Process Term"
    ) +
    theme_minimal(base_size = 10) +
    theme(
      plot.title = element_text(size = 16, face = "bold"),
      plot.subtitle = element_text(size = 12, color = "gray40"),
      axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 8),
      axis.text.y = element_text(size = 9),
      panel.grid = element_blank(),
      legend.position = "right"
    )
  
  print(p_heatmap)
  
  # Save heatmap
  ggsave(
    filename = file.path(output_dir, "rank35_BP_GO_heatmap.png"),
    plot = p_heatmap,
    width = 16,
    height = 12,
    dpi = 300
  )
}

Session Info

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.7.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] C
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] scales_1.3.0       RColorBrewer_1.1-3 lubridate_1.9.3    forcats_1.0.0     
##  [5] stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2        readr_2.1.5       
##  [9] tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1      tidyverse_2.0.0   
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9        utf8_1.2.4        generics_0.1.3    stringi_1.8.4    
##  [5] hms_1.1.3         digest_0.6.37     magrittr_2.0.3    evaluate_1.0.0   
##  [9] grid_4.3.2        timechange_0.3.0  fastmap_1.2.0     jsonlite_1.8.9   
## [13] fansi_1.0.6       textshaping_0.4.0 jquerylib_0.1.4   cli_3.6.3        
## [17] rlang_1.1.4       crayon_1.5.3      bit64_4.5.2       munsell_0.5.1    
## [21] withr_3.0.1       cachem_1.1.0      yaml_2.3.10       tools_4.3.2      
## [25] parallel_4.3.2    tzdb_0.4.0        colorspace_2.1-1  vctrs_0.6.5      
## [29] R6_2.5.1          lifecycle_1.0.4   bit_4.5.0         vroom_1.6.5      
## [33] ragg_1.3.3        pkgconfig_2.0.3   pillar_1.9.0      bslib_0.8.0      
## [37] gtable_0.3.6      glue_1.8.0        systemfonts_1.1.0 highr_0.11       
## [41] xfun_0.48         tidyselect_1.2.1  knitr_1.48        farver_2.1.2     
## [45] htmltools_0.5.8.1 rmarkdown_2.28    labeling_0.4.3    compiler_4.3.2