--- title: "05.1-expressed-mRNA-top10-GOslim-GO-counts" author: "Sam White" date: "2025-07-09" output: github_document: toc: true number_sections: true bookdown::html_document2: theme: cosmo toc: true toc_float: true number_sections: true code_folding: show code_download: true html_document: theme: cosmo toc: true toc_float: true number_sections: true code_folding: show code_download: true --- # BACKGROUND Identify, and plot, the Top 10 GOslims by number of GO term assignments across all three species. ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = TRUE, # Evaluate code chunks warning = FALSE, # Hide warnings message = FALSE, # Hide messages comment = "" # Prevents appending '##' to beginning of lines in code output ) library(tidyverse) ``` ```{r} # Define file paths and species names files <- list( "D-Apul" = "~/gitrepos/urol-e5/deep-dive-expression/D-Apul/output/30.00-Apul-transcriptome-GOslims/GOslim-counts.tsv", "E-Peve" = "~/gitrepos/urol-e5/deep-dive-expression/E-Peve/output/30.00-Peve-transcriptome-GOslims/GOslim-counts.tsv", "F-Ptuh" = "~/gitrepos/urol-e5/deep-dive-expression/F-Ptuh/output/30.00-Ptua-transcriptome-GOslims/GOslim-counts.tsv" ) # Read and combine data goslim_data <- map2_dfr( files, names(files), ~ read_tsv(.x, show_col_types = FALSE) %>% mutate(Species = .y) ) # Find the top 10 Terms by Count for each species top_terms <- goslim_data %>% group_by(Species) %>% slice_max(order_by = Count, n = 10, with_ties = FALSE) %>% ungroup() %>% distinct(Term) # Filter the data to only include these Terms goslim_top <- goslim_data %>% filter(Term %in% top_terms$Term) # Plot: Term on Y, Count on X ggplot(goslim_top, aes(y = fct_reorder(Term, Count), x = Count, fill = Species)) + geom_bar(stat = "identity", position = position_dodge(width = 0.8)) + labs( title = "Top GOslim Term Counts by Species", y = "GOslim Term", x = "Count" ) + theme_bw() + theme( axis.text.y = element_text(size = 8), plot.title = element_text(hjust = 0.5) ) ```