# Load data
Phel <- read.delim("../output/18-annot-pycno/Phel_immune_shortlist.txt", stringsAsFactors = FALSE)
Pisa <- read.delim("../output/19-annot-pisaster/Pisaster_immune_shortlist.txt", stringsAsFactors = FALSE)
Derm <- read.delim("../output/20-annot-derm/Derm_immune_shortlist.txt", stringsAsFactors = FALSE)
# Deduplicate
Phel_unique <- Phel %>% distinct(Entry, .keep_all = TRUE)
Pisa_unique <- Pisa %>% distinct(Entry, .keep_all = TRUE)
Derm_unique <- Derm %>% distinct(Entry, .keep_all = TRUE)
# Join
joined_immune <- Derm_unique %>%
inner_join(Phel_unique, by = "Entry") %>%
inner_join(Pisa_unique, by = "Entry")
selected_df <- joined_immune %>%
select(
Entry,
gene_name.x,
Entry.Name.x,
Protein.Names.x,
Gene.Names.x,
Gene.Ontology.biological.process.x,
starts_with("transcript_id.x"),
starts_with("sum.x"),
starts_with("mean.x"),
starts_with("std.dev.x"),
starts_with("std.error.x"),
starts_with("transcript_id.y"),
starts_with("sum.y"),
starts_with("mean.y"),
starts_with("std.dev.y"),
starts_with("std.error.y"),
transcript_id,
sum,
mean,
std.dev,
std.error
) %>%
rename_with(~ gsub("\\.x$", "_Derm", .x), ends_with(".x")) %>%
rename_with(~ gsub("\\.y$", "_Phel", .x), ends_with(".y")) %>%
rename_with(~ paste0(.x, "_Pisa"), c("transcript_id", "sum", "mean", "std.dev", "std.error")) %>%
rowwise() %>%
mutate(
mean_range = max(c_across(c(mean_Derm, mean_Phel, mean_Pisa)), na.rm = TRUE) -
min(c_across(c(mean_Derm, mean_Phel, mean_Pisa)), na.rm = TRUE)
) %>%
ungroup()
most_equal <- selected_df %>%
arrange(mean_range) %>%
slice(1:20)
most_divergent <- selected_df %>%
arrange(desc(mean_range)) %>%
slice(1:20)
write.csv(most_equal, "../output/22.5-species-compare/most_equal_proteins.csv", row.names = FALSE)
write.csv(most_divergent, "../output/22.5-species-compare/most_divergent_proteins.csv", row.names = FALSE)
Download links:
datatable(most_equal %>%
select(Entry, Protein.Names_Derm, mean_Derm, mean_Phel, mean_Pisa),
options = list(pageLength = 10),
caption = "Top 20 Most Similar Proteins by Mean Expression")
datatable(most_divergent %>%
select(Entry, Protein.Names_Derm, mean_Derm, mean_Phel, mean_Pisa),
options = list(pageLength = 10),
caption = "Top 10 Most Divergent Proteins by Mean Expression")
most_divergent %>%
select(Entry, mean_Derm, mean_Phel, mean_Pisa) %>%
pivot_longer(-Entry, names_to = "Species", values_to = "Mean") %>%
ggplot(aes(x = Species, y = Mean, group = Entry, color = Entry)) +
geom_line() +
geom_point() +
theme_minimal() +
labs(
title = "Top 20 Most Divergent Proteins by Mean Expression",
x = "Species",
y = "Mean Expression"
) +
theme(legend.position = "none")