0.1 1. Load and Clean Data

# Load data
Phel <- read.delim("../output/18-annot-pycno/Phel_immune_shortlist.txt", stringsAsFactors = FALSE)
Pisa <- read.delim("../output/19-annot-pisaster/Pisaster_immune_shortlist.txt", stringsAsFactors = FALSE)
Derm <- read.delim("../output/20-annot-derm/Derm_immune_shortlist.txt", stringsAsFactors = FALSE)

# Deduplicate
Phel_unique <- Phel %>% distinct(Entry, .keep_all = TRUE)
Pisa_unique <- Pisa %>% distinct(Entry, .keep_all = TRUE)
Derm_unique <- Derm %>% distinct(Entry, .keep_all = TRUE)

# Join
joined_immune <- Derm_unique %>%
  inner_join(Phel_unique, by = "Entry") %>%
  inner_join(Pisa_unique, by = "Entry")

0.2 2. Select, Rename, and Compute Differences

selected_df <- joined_immune %>%
  select(
    Entry,
    gene_name.x,
    Entry.Name.x,
    Protein.Names.x,
    Gene.Names.x,
    Gene.Ontology.biological.process.x,
    starts_with("transcript_id.x"),
    starts_with("sum.x"),
    starts_with("mean.x"),
    starts_with("std.dev.x"),
    starts_with("std.error.x"),
    starts_with("transcript_id.y"),
    starts_with("sum.y"),
    starts_with("mean.y"),
    starts_with("std.dev.y"),
    starts_with("std.error.y"),
    transcript_id,
    sum,
    mean,
    std.dev,
    std.error
  ) %>%
  rename_with(~ gsub("\\.x$", "_Derm", .x), ends_with(".x")) %>%
  rename_with(~ gsub("\\.y$", "_Phel", .x), ends_with(".y")) %>%
  rename_with(~ paste0(.x, "_Pisa"), c("transcript_id", "sum", "mean", "std.dev", "std.error")) %>%
  rowwise() %>%
  mutate(
    mean_range = max(c_across(c(mean_Derm, mean_Phel, mean_Pisa)), na.rm = TRUE) -
                 min(c_across(c(mean_Derm, mean_Phel, mean_Pisa)), na.rm = TRUE)
  ) %>%
  ungroup()

0.3 3. Identify Most Similar and Divergent Proteins

most_equal <- selected_df %>%
  arrange(mean_range) %>%
  slice(1:20)

most_divergent <- selected_df %>%
  arrange(desc(mean_range)) %>%
  slice(1:20)

0.4 4. Save CSVs

write.csv(most_equal, "../output/22.5-species-compare/most_equal_proteins.csv", row.names = FALSE)
write.csv(most_divergent, "../output/22.5-species-compare/most_divergent_proteins.csv", row.names = FALSE)

Download links:

0.5 5. Interactive Tables

0.5.1 Top 20 Most Similar Proteins

datatable(most_equal %>%
            select(Entry, Protein.Names_Derm, mean_Derm, mean_Phel, mean_Pisa),
          options = list(pageLength = 10),
          caption = "Top 20 Most Similar Proteins by Mean Expression")

0.5.2 Top 10 Most Divergent Proteins

datatable(most_divergent %>%
            select(Entry, Protein.Names_Derm, mean_Derm, mean_Phel, mean_Pisa),
          options = list(pageLength = 10),
          caption = "Top 10 Most Divergent Proteins by Mean Expression")

0.6 6. Plot: Divergent Proteins

most_divergent %>%
  select(Entry, mean_Derm, mean_Phel, mean_Pisa) %>%
  pivot_longer(-Entry, names_to = "Species", values_to = "Mean") %>%
  ggplot(aes(x = Species, y = Mean, group = Entry, color = Entry)) +
  geom_line() +
  geom_point() +
  theme_minimal() +
  labs(
    title = "Top 20 Most Divergent Proteins by Mean Expression",
    x = "Species",
    y = "Mean Expression"
  ) +
  theme(legend.position = "none")