This analysis visualizes the Biological Process (BP) Gene Ontology terms from the top 100 genes in each of the 35 components from the rank 35 tensor decomposition.
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(scales)
# Input directory
input_dir <- "/Users/sr320/Documents/GitHub/timeseries_molecular/M-multi-species/output/22-Visualizing-Rank-outs"
# Output directory
output_dir <- "/Users/sr320/Documents/GitHub/timeseries_molecular/M-multi-species/output/23-visualizing-rank35"
# Create output directory if it doesn't exist
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
# Get all rank_35 top100 annotation files
files <- list.files(input_dir,
pattern = "rank_35_comp.*_top100_annotation\\.csv$",
full.names = TRUE)
# Sort files by component number
file_df <- data.frame(
path = files,
component = str_extract(basename(files), "comp\\d+")
) %>%
mutate(comp_num = as.numeric(str_extract(component, "\\d+"))) %>%
arrange(comp_num)
cat(sprintf("Found %d files to process\n", nrow(file_df)))
## Found 35 files to process
# Function to parse GO BP column
parse_go_bp <- function(go_bp_string) {
if (is.na(go_bp_string) || go_bp_string == "") {
return(NULL)
}
# Split by semicolon and trim whitespace
terms <- str_split(go_bp_string, ";")[[1]] %>%
str_trim()
# Extract term name (everything before [GO:...)
term_names <- str_replace(terms, "\\s*\\[GO:.*\\]", "")
return(term_names)
}
# Function to process a single file
process_file <- function(file_path, component_name) {
# Read the file
data <- read_csv(file_path, show_col_types = FALSE)
# Extract all GO BP terms
all_terms <- data %>%
filter(!is.na(go_bp) & go_bp != "") %>%
pull(go_bp) %>%
map(parse_go_bp) %>%
unlist()
if (length(all_terms) == 0) {
return(NULL)
}
# Count term frequencies
term_counts <- as.data.frame(table(all_terms)) %>%
rename(term = all_terms, count = Freq) %>%
arrange(desc(count)) %>%
head(20) # Top 20 terms
term_counts$component <- component_name
return(term_counts)
}
# Store all results for summary
all_results <- list()
for (i in 1:nrow(file_df)) {
file_path <- file_df$path[i]
component <- file_df$component[i]
comp_num <- file_df$comp_num[i]
cat(sprintf("\n\n## Component %d\n\n", comp_num))
# Process the file
term_counts <- process_file(file_path, component)
if (is.null(term_counts) || nrow(term_counts) == 0) {
cat("No GO BP terms found for this component.\n\n")
next
}
all_results[[component]] <- term_counts
# Create visualization
p <- ggplot(term_counts, aes(x = reorder(term, count), y = count)) +
geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
coord_flip() +
labs(
title = sprintf("Top 20 Biological Process GO Terms - %s", component),
subtitle = sprintf("From top 100 genes in rank 35 decomposition"),
x = "GO Biological Process Term",
y = "Frequency"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray40"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)
print(p)
# Save individual plot
ggsave(
filename = file.path(output_dir, sprintf("rank35_%s_BP_GO_top20.png", component)),
plot = p,
width = 14,
height = 10,
dpi = 300
)
# Print top 10 terms as table
cat("\n### Top 10 Terms\n\n")
print(knitr::kable(head(term_counts, 10), row.names = FALSE))
cat("\n\n")
}
### Top 10 Terms
| term | count | component |
|---|---|---|
| defense response to Gram-negative bacterium | 3 | comp1 |
| innate immune response | 3 | comp1 |
| positive regulation of gene expression | 3 | comp1 |
| positive regulation of transcription by RNA polymerase II | 3 | comp1 |
| proteolysis | 3 | comp1 |
| response to bacterium | 3 | comp1 |
| signal transduction | 3 | comp1 |
| actin cytoskeleton organization | 2 | comp1 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp1 |
| blastocyst development | 2 | comp1 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| innate immune response | 4 | comp2 |
| proteolysis | 4 | comp2 |
| cell adhesion | 3 | comp2 |
| cell division | 3 | comp2 |
| actin cytoskeleton organization | 2 | comp2 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp2 |
| basement membrane organization | 2 | comp2 |
| cell surface receptor signaling pathway | 2 | comp2 |
| collagen fibril organization | 2 | comp2 |
| defense response to Gram-negative bacterium | 2 | comp2 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| proteolysis | 4 | comp3 |
| cell differentiation | 3 | comp3 |
| innate immune response | 3 | comp3 |
| positive regulation of gene expression | 3 | comp3 |
| actin cytoskeleton organization | 2 | comp3 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp3 |
| defense response to Gram-negative bacterium | 2 | comp3 |
| gene expression | 2 | comp3 |
| heart trabecula formation | 2 | comp3 |
| inflammatory response | 2 | comp3 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| defense response to Gram-negative bacterium | 4 | comp4 |
| innate immune response | 4 | comp4 |
| positive regulation of gene expression | 4 | comp4 |
| positive regulation of transcription by RNA polymerase II | 4 | comp4 |
| cell adhesion | 3 | comp4 |
| cell division | 3 | comp4 |
| mRNA processing | 3 | comp4 |
| negative regulation of apoptotic process | 3 | comp4 |
| negative regulation of transcription by RNA polymerase II | 3 | comp4 |
| proteolysis | 3 | comp4 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| defense response to Gram-negative bacterium | 3 | comp5 |
| antibacterial innate immune response | 2 | comp5 |
| epithelial cell proliferation | 2 | comp5 |
| flagellated sperm motility | 2 | comp5 |
| positive regulation of acrosome reaction | 2 | comp5 |
| positive regulation of epithelial cell proliferation | 2 | comp5 |
| positive regulation of gene expression | 2 | comp5 |
| proteolysis | 2 | comp5 |
| regulation of transcription by RNA polymerase II | 2 | comp5 |
| triglyceride catabolic process | 2 | comp5 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| gene expression | 3 | comp6 |
| innate immune response | 3 | comp6 |
| negative regulation of apoptotic process | 3 | comp6 |
| positive regulation of gene expression | 3 | comp6 |
| positive regulation of phosphatidylinositol 3-kinase/protein kinase B signal transduction | 3 | comp6 |
| signal transduction | 3 | comp6 |
| actin cytoskeleton organization | 2 | comp6 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp6 |
| basement membrane organization | 2 | comp6 |
| cell differentiation | 2 | comp6 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| innate immune response | 4 | comp7 |
| positive regulation of transcription by RNA polymerase II | 4 | comp7 |
| defense response to Gram-negative bacterium | 3 | comp7 |
| gene expression | 3 | comp7 |
| negative regulation of transcription by RNA polymerase II | 3 | comp7 |
| positive regulation of gene expression | 3 | comp7 |
| proteolysis | 3 | comp7 |
| response to bacterium | 3 | comp7 |
| response to lipopolysaccharide | 3 | comp7 |
| signal transduction | 3 | comp7 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| positive regulation of transcription by RNA polymerase II | 4 | comp8 |
| defense response to Gram-negative bacterium | 3 | comp8 |
| gene expression | 3 | comp8 |
| innate immune response | 3 | comp8 |
| positive regulation of gene expression | 3 | comp8 |
| response to bacterium | 3 | comp8 |
| signal transduction | 3 | comp8 |
| actin cytoskeleton organization | 2 | comp8 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp8 |
| basement membrane organization | 2 | comp8 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| regulation of transcription by RNA polymerase II | 7 | comp9 |
| positive regulation of transcription by RNA polymerase II | 6 | comp9 |
| negative regulation of transcription by RNA polymerase II | 4 | comp9 |
| positive regulation of gene expression | 4 | comp9 |
| cilium assembly | 3 | comp9 |
| negative regulation of apoptotic process | 3 | comp9 |
| positive regulation of DNA-templated transcription | 3 | comp9 |
| MAPK cascade | 2 | comp9 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp9 |
| basement membrane organization | 2 | comp9 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| flagellated sperm motility | 3 | comp10 |
| RNA splicing | 2 | comp10 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp10 |
| axonemal central apparatus assembly | 2 | comp10 |
| cell surface receptor signaling pathway | 2 | comp10 |
| cilium assembly | 2 | comp10 |
| cilium movement involved in cell motility | 2 | comp10 |
| epithelial cilium movement involved in extracellular fluid movement | 2 | comp10 |
| innate immune response | 2 | comp10 |
| mRNA processing | 2 | comp10 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| cell adhesion | 3 | comp11 |
| cell division | 3 | comp11 |
| proteolysis | 3 | comp11 |
| response to lipopolysaccharide | 3 | comp11 |
| signal transduction | 3 | comp11 |
| actin cytoskeleton organization | 2 | comp11 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp11 |
| basement membrane organization | 2 | comp11 |
| cell surface receptor signaling pathway | 2 | comp11 |
| cilium assembly | 2 | comp11 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| negative regulation of transcription by RNA polymerase II | 4 | comp12 |
| Notch signaling pathway | 2 | comp12 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp12 |
| defense response to Gram-negative bacterium | 2 | comp12 |
| epithelial cell proliferation | 2 | comp12 |
| flagellated sperm motility | 2 | comp12 |
| heart trabecula formation | 2 | comp12 |
| liver development | 2 | comp12 |
| muscle organ development | 2 | comp12 |
| negative regulation of apoptotic process | 2 | comp12 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| innate immune response | 5 | comp13 |
| defense response to Gram-negative bacterium | 3 | comp13 |
| gene expression | 3 | comp13 |
| positive regulation of gene expression | 3 | comp13 |
| positive regulation of transcription by RNA polymerase II | 3 | comp13 |
| response to bacterium | 3 | comp13 |
| signal transduction | 3 | comp13 |
| DNA repair | 2 | comp13 |
| actin cytoskeleton organization | 2 | comp13 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp13 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| positive regulation of transcription by RNA polymerase II | 4 | comp14 |
| proteolysis | 4 | comp14 |
| defense response to Gram-negative bacterium | 3 | comp14 |
| in utero embryonic development | 3 | comp14 |
| innate immune response | 3 | comp14 |
| positive regulation of gene expression | 3 | comp14 |
| signal transduction | 3 | comp14 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp14 |
| cell surface receptor signaling pathway | 2 | comp14 |
| gene expression | 2 | comp14 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| cell adhesion | 3 | comp15 |
| cell division | 3 | comp15 |
| innate immune response | 3 | comp15 |
| negative regulation of apoptotic process | 3 | comp15 |
| actin cytoskeleton organization | 2 | comp15 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp15 |
| basement membrane organization | 2 | comp15 |
| cell fate commitment | 2 | comp15 |
| cell surface receptor signaling pathway | 2 | comp15 |
| cilium assembly | 2 | comp15 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| innate immune response | 4 | comp16 |
| positive regulation of gene expression | 4 | comp16 |
| positive regulation of transcription by RNA polymerase II | 4 | comp16 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 3 | comp16 |
| basement membrane organization | 3 | comp16 |
| cell adhesion | 3 | comp16 |
| cell division | 3 | comp16 |
| gene expression | 3 | comp16 |
| negative regulation of apoptotic process | 3 | comp16 |
| proteolysis | 3 | comp16 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| proteolysis | 4 | comp17 |
| visual perception | 4 | comp17 |
| negative regulation of transcription by RNA polymerase II | 3 | comp17 |
| regulation of transcription by RNA polymerase II | 3 | comp17 |
| Notch signaling pathway | 2 | comp17 |
| Wnt signaling pathway | 2 | comp17 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp17 |
| basement membrane organization | 2 | comp17 |
| cell adhesion | 2 | comp17 |
| cell division | 2 | comp17 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| flagellated sperm motility | 3 | comp18 |
| positive regulation of transcription by RNA polymerase II | 3 | comp18 |
| signal transduction | 3 | comp18 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp18 |
| cell surface receptor signaling pathway | 2 | comp18 |
| cellular response to heat | 2 | comp18 |
| cilium assembly | 2 | comp18 |
| gene expression | 2 | comp18 |
| innate immune response | 2 | comp18 |
| mRNA processing | 2 | comp18 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| innate immune response | 4 | comp19 |
| positive regulation of transcription by RNA polymerase II | 3 | comp19 |
| proteolysis | 3 | comp19 |
| Notch signaling pathway | 2 | comp19 |
| axonogenesis | 2 | comp19 |
| defense response to Gram-negative bacterium | 2 | comp19 |
| defense response to bacterium | 2 | comp19 |
| gene expression | 2 | comp19 |
| negative regulation of DNA-templated transcription | 2 | comp19 |
| ovarian follicle development | 2 | comp19 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| cilium assembly | 3 | comp20 |
| proteolysis | 3 | comp20 |
| signal transduction | 3 | comp20 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp20 |
| basement membrane organization | 2 | comp20 |
| cell surface receptor signaling pathway | 2 | comp20 |
| flagellated sperm motility | 2 | comp20 |
| gene expression | 2 | comp20 |
| innate immune response | 2 | comp20 |
| negative regulation of apoptotic process | 2 | comp20 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| gene expression | 3 | comp21 |
| negative regulation of transcription by RNA polymerase II | 3 | comp21 |
| positive regulation of gene expression | 3 | comp21 |
| proteolysis | 3 | comp21 |
| response to lipopolysaccharide | 3 | comp21 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp21 |
| basement membrane organization | 2 | comp21 |
| cell surface receptor signaling pathway | 2 | comp21 |
| cilium assembly | 2 | comp21 |
| defense response to Gram-negative bacterium | 2 | comp21 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| cell adhesion | 4 | comp22 |
| innate immune response | 4 | comp22 |
| proteolysis | 4 | comp22 |
| cell division | 3 | comp22 |
| positive regulation of gene expression | 3 | comp22 |
| positive regulation of transcription by RNA polymerase II | 3 | comp22 |
| signal transduction | 3 | comp22 |
| DNA repair | 2 | comp22 |
| actin cytoskeleton organization | 2 | comp22 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp22 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| signal transduction | 4 | comp23 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 3 | comp23 |
| proteolysis | 3 | comp23 |
| G protein-coupled receptor signaling pathway | 2 | comp23 |
| apoptotic process | 2 | comp23 |
| cell surface receptor signaling pathway | 2 | comp23 |
| defense response to Gram-negative bacterium | 2 | comp23 |
| innate immune response | 2 | comp23 |
| positive regulation of reactive oxygen species biosynthetic process | 2 | comp23 |
| DNA double-strand break processing | 1 | comp23 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| positive regulation of transcription by RNA polymerase II | 5 | comp24 |
| defense response to Gram-negative bacterium | 4 | comp24 |
| positive regulation of gene expression | 3 | comp24 |
| proteolysis | 3 | comp24 |
| regulation of transcription by RNA polymerase II | 3 | comp24 |
| response to bacterium | 3 | comp24 |
| DNA repair | 2 | comp24 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp24 |
| blastocyst development | 2 | comp24 |
| cell surface receptor signaling pathway | 2 | comp24 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| defense response to Gram-negative bacterium | 3 | comp25 |
| positive regulation of transcription by RNA polymerase II | 3 | comp25 |
| actin cytoskeleton organization | 2 | comp25 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp25 |
| cell surface receptor signaling pathway | 2 | comp25 |
| flagellated sperm motility | 2 | comp25 |
| mRNA processing | 2 | comp25 |
| negative regulation of apoptotic process | 2 | comp25 |
| negative regulation of transcription by RNA polymerase II | 2 | comp25 |
| proteolysis | 2 | comp25 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| positive regulation of transcription by RNA polymerase II | 5 | comp26 |
| positive regulation of gene expression | 4 | comp26 |
| defense response to Gram-negative bacterium | 3 | comp26 |
| innate immune response | 3 | comp26 |
| mRNA processing | 3 | comp26 |
| proteolysis | 3 | comp26 |
| regulation of transcription by RNA polymerase II | 3 | comp26 |
| signal transduction | 3 | comp26 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp26 |
| cell division | 2 | comp26 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| proteolysis | 4 | comp27 |
| defense response to Gram-negative bacterium | 3 | comp27 |
| innate immune response | 3 | comp27 |
| mRNA processing | 3 | comp27 |
| positive regulation of gene expression | 3 | comp27 |
| positive regulation of transcription by RNA polymerase II | 3 | comp27 |
| signal transduction | 3 | comp27 |
| DNA repair | 2 | comp27 |
| RNA splicing | 2 | comp27 |
| actin cytoskeleton organization | 2 | comp27 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| positive regulation of transcription by RNA polymerase II | 5 | comp28 |
| defense response to Gram-negative bacterium | 3 | comp28 |
| gene expression | 3 | comp28 |
| innate immune response | 3 | comp28 |
| positive regulation of gene expression | 3 | comp28 |
| signal transduction | 3 | comp28 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp28 |
| basement membrane organization | 2 | comp28 |
| cell adhesion | 2 | comp28 |
| cell division | 2 | comp28 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| proteolysis | 4 | comp29 |
| innate immune response | 3 | comp29 |
| positive regulation of gene expression | 3 | comp29 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp29 |
| cell adhesion | 2 | comp29 |
| cell surface receptor signaling pathway | 2 | comp29 |
| defense response to Gram-negative bacterium | 2 | comp29 |
| extracellular matrix organization | 2 | comp29 |
| in utero embryonic development | 2 | comp29 |
| positive regulation of interleukin-6 production | 2 | comp29 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 3 | comp30 |
| cell surface receptor signaling pathway | 3 | comp30 |
| negative regulation of apoptotic process | 3 | comp30 |
| negative regulation of transcription by RNA polymerase II | 3 | comp30 |
| positive regulation of gene expression | 3 | comp30 |
| positive regulation of transcription by RNA polymerase II | 3 | comp30 |
| regulation of transcription by RNA polymerase II | 3 | comp30 |
| response to lipopolysaccharide | 3 | comp30 |
| NADPH regeneration | 2 | comp30 |
| basement membrane organization | 2 | comp30 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| visual perception | 4 | comp31 |
| defense response to Gram-negative bacterium | 3 | comp31 |
| negative regulation of transcription by RNA polymerase II | 3 | comp31 |
| positive regulation of transcription by RNA polymerase II | 3 | comp31 |
| proteolysis | 3 | comp31 |
| actin cytoskeleton organization | 2 | comp31 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp31 |
| cell surface receptor signaling pathway | 2 | comp31 |
| flagellated sperm motility | 2 | comp31 |
| gene expression | 2 | comp31 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| proteolysis | 4 | comp32 |
| in utero embryonic development | 3 | comp32 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp32 |
| cell surface receptor signaling pathway | 2 | comp32 |
| flagellated sperm motility | 2 | comp32 |
| protein homotetramerization | 2 | comp32 |
| signal transduction | 2 | comp32 |
| visual perception | 2 | comp32 |
| CDP biosynthetic process | 1 | comp32 |
| CMP catabolic process | 1 | comp32 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| cilium assembly | 4 | comp33 |
| Notch signaling pathway | 2 | comp33 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 2 | comp33 |
| cell differentiation | 2 | comp33 |
| cell projection organization | 2 | comp33 |
| cell surface receptor signaling pathway | 2 | comp33 |
| defense response to Gram-negative bacterium | 2 | comp33 |
| flagellated sperm motility | 2 | comp33 |
| heart looping | 2 | comp33 |
| negative regulation of transforming growth factor beta receptor signaling pathway | 2 | comp33 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| heart development | 4 | comp34 |
| negative regulation of apoptotic process | 4 | comp34 |
| adenylate cyclase-activating G protein-coupled receptor signaling pathway | 3 | comp34 |
| basement membrane organization | 3 | comp34 |
| gene expression | 3 | comp34 |
| negative regulation of transcription by RNA polymerase II | 3 | comp34 |
| response to lipopolysaccharide | 3 | comp34 |
| response to xenobiotic stimulus | 3 | comp34 |
| Notch signaling pathway | 2 | comp34 |
| axon guidance | 2 | comp34 |
### Top 10 Terms
| term | count | component |
|---|---|---|
| positive regulation of gene expression | 4 | comp35 |
| positive regulation of transcription by RNA polymerase II | 4 | comp35 |
| defense response to Gram-negative bacterium | 3 | comp35 |
| gene expression | 3 | comp35 |
| immune system process | 3 | comp35 |
| innate immune response | 3 | comp35 |
| proteolysis | 3 | comp35 |
| regulation of transcription by RNA polymerase II | 3 | comp35 |
| signal transduction | 3 | comp35 |
| cell adhesion | 2 | comp35 |
# Combine all results
if (length(all_results) > 0) {
combined_data <- bind_rows(all_results)
# Get overall most common terms
overall_counts <- combined_data %>%
group_by(term) %>%
summarise(
total_count = sum(count),
n_components = n()
) %>%
arrange(desc(total_count)) %>%
head(30)
# Create overall summary plot
p_summary <- ggplot(overall_counts, aes(x = reorder(term, total_count), y = total_count)) +
geom_bar(stat = "identity", fill = "darkblue", alpha = 0.7) +
geom_text(aes(label = n_components), hjust = -0.3, size = 3, color = "red") +
coord_flip() +
labs(
title = "Top 30 Biological Process GO Terms Across All Rank 35 Components",
subtitle = "Red numbers indicate how many components contain each term",
x = "GO Biological Process Term",
y = "Total Frequency Across All Components"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray40"),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)
print(p_summary)
# Save summary plot
ggsave(
filename = file.path(output_dir, "rank35_ALL_components_BP_GO_summary.png"),
plot = p_summary,
width = 14,
height = 12,
dpi = 300
)
# Save summary table
write_csv(overall_counts,
file.path(output_dir, "rank35_BP_GO_summary_table.csv"))
cat("\n### Top 30 GO BP Terms Summary\n\n")
print(knitr::kable(overall_counts, row.names = FALSE))
}
##
## ### Top 30 GO BP Terms Summary
##
##
##
## |term | total_count| n_components|
## |:-------------------------------------------------------------------------|-----------:|------------:|
## |innate immune response | 80| 27|
## |proteolysis | 79| 25|
## |positive regulation of gene expression | 78| 27|
## |defense response to Gram-negative bacterium | 76| 30|
## |positive regulation of transcription by RNA polymerase II | 76| 21|
## |adenylate cyclase-activating G protein-coupled receptor signaling pathway | 69| 33|
## |cell surface receptor signaling pathway | 59| 29|
## |gene expression | 59| 25|
## |signal transduction | 59| 21|
## |negative regulation of transcription by RNA polymerase II | 47| 18|
## |negative regulation of apoptotic process | 46| 19|
## |basement membrane organization | 36| 17|
## |cilium assembly | 36| 16|
## |actin cytoskeleton organization | 31| 16|
## |cell adhesion | 29| 11|
## |response to bacterium | 28| 11|
## |response to lipopolysaccharide | 28| 11|
## |regulation of transcription by RNA polymerase II | 28| 9|
## |flagellated sperm motility | 26| 12|
## |mRNA processing | 25| 11|
## |cell division | 24| 9|
## |positive regulation of reactive oxygen species biosynthetic process | 24| 12|
## |visual perception | 22| 9|
## |negative regulation of tumor necrosis factor production | 20| 10|
## |in utero embryonic development | 18| 8|
## |DNA repair | 15| 10|
## |Notch signaling pathway | 13| 8|
## |cell differentiation | 13| 6|
## |blastocyst development | 12| 6|
## |defense response to bacterium | 12| 6|
if (length(all_results) > 0) {
# Get top 30 overall terms
top_terms <- overall_counts$term[1:min(30, nrow(overall_counts))]
# Create matrix of term counts by component
heatmap_data <- combined_data %>%
filter(term %in% top_terms) %>%
select(component, term, count) %>%
pivot_wider(names_from = component, values_from = count, values_fill = 0)
# Convert to matrix format for better visualization
heatmap_matrix <- heatmap_data %>%
column_to_rownames("term") %>%
as.matrix()
# Reorder by component number
comp_order <- str_sort(colnames(heatmap_matrix), numeric = TRUE)
heatmap_matrix <- heatmap_matrix[, comp_order]
# Convert back for ggplot
heatmap_long <- heatmap_matrix %>%
as.data.frame() %>%
rownames_to_column("term") %>%
pivot_longer(-term, names_to = "component", values_to = "count")
# Order terms by total frequency
term_order <- overall_counts$term[1:min(30, nrow(overall_counts))]
heatmap_long$term <- factor(heatmap_long$term, levels = rev(term_order))
# Order components numerically
heatmap_long <- heatmap_long %>%
mutate(comp_num = as.numeric(str_extract(component, "\\d+"))) %>%
arrange(comp_num)
heatmap_long$component <- factor(heatmap_long$component,
levels = unique(heatmap_long$component[order(heatmap_long$comp_num)]))
# Create heatmap
p_heatmap <- ggplot(heatmap_long, aes(x = component, y = term, fill = count)) +
geom_tile(color = "white", size = 0.5) +
scale_fill_gradient2(low = "white", mid = "lightblue", high = "darkblue",
midpoint = max(heatmap_long$count)/2,
name = "Count") +
labs(
title = "GO BP Terms Distribution Across Rank 35 Components",
subtitle = "Top 30 most frequent terms",
x = "Component",
y = "GO Biological Process Term"
) +
theme_minimal(base_size = 10) +
theme(
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray40"),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 8),
axis.text.y = element_text(size = 9),
panel.grid = element_blank(),
legend.position = "right"
)
print(p_heatmap)
# Save heatmap
ggsave(
filename = file.path(output_dir, "rank35_BP_GO_heatmap.png"),
plot = p_heatmap,
width = 16,
height = 12,
dpi = 300
)
}
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.7.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] C
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scales_1.3.0 RColorBrewer_1.1-3 lubridate_1.9.3 forcats_1.0.0
## [5] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
## [9] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3 stringi_1.8.4
## [5] hms_1.1.3 digest_0.6.37 magrittr_2.0.3 evaluate_1.0.0
## [9] grid_4.3.2 timechange_0.3.0 fastmap_1.2.0 jsonlite_1.8.9
## [13] fansi_1.0.6 textshaping_0.4.0 jquerylib_0.1.4 cli_3.6.3
## [17] rlang_1.1.4 crayon_1.5.3 bit64_4.5.2 munsell_0.5.1
## [21] withr_3.0.1 cachem_1.1.0 yaml_2.3.10 tools_4.3.2
## [25] parallel_4.3.2 tzdb_0.4.0 colorspace_2.1-1 vctrs_0.6.5
## [29] R6_2.5.1 lifecycle_1.0.4 bit_4.5.0 vroom_1.6.5
## [33] ragg_1.3.3 pkgconfig_2.0.3 pillar_1.9.0 bslib_0.8.0
## [37] gtable_0.3.6 glue_1.8.0 systemfonts_1.1.0 highr_0.11
## [41] xfun_0.48 tidyselect_1.2.1 knitr_1.48 farver_2.1.2
## [45] htmltools_0.5.8.1 rmarkdown_2.28 labeling_0.4.3 compiler_4.3.2