---
title: "17-uniprot_summaries"
output: html_document
date: "2025-04-02"
---
```{r}
Sys.setenv(ENTREZ_KEY = "8f817031916b3cfee7ec443b712c426d9a08")
install.packages("rentrez")
library(rentrez)
```
```{r}
fetch_ncbi_summary <- function(uniprot_ids, email) {
entrez_db <- "gene"
summaries <- list()
for (uniprot_id in uniprot_ids) {
tryCatch({
print("hello")
# Search for the UniProt ID in NCBI Protein database
search_results <- entrez_search(db = entrez_db, term = uniprot_id, retmax = 1, api_key="8f817031916b3cfee7ec443b712c426d9a08")
print(search_results)
if (length(search_results$ids) > 0) {
ncbi_id <- search_results$ids[1]
print(ncbi_id)
# Fetch the summary for the retrieved NCBI ID
summary_results <- entrez_summary(db = entrez_db, id = ncbi_id, api_key="8f817031916b3cfee7ec443b712c426d9a08")
print(summary_results$summary)
summaries[[uniprot_id]] <- summary_results$summary
} else {
summaries[[uniprot_id]] <- "No match found"
}
}, error = function(e) {
summaries[[uniprot_id]] <- paste("Error:", e$message)
})
}
return(summaries)
}
```
### GOW
```{r}
top_genes <- read.csv("/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/Top_50_genes/GOW_topgenes.csv")
# Example usage
uniprot_ids <- c(top_genes$uniprot_accession) # Replace with your list
email <- "sgleuch@uw.edu" # Replace with your email
# Set email for NCBI API
set_entrez_key(email)
GOW_summaries <- fetch_ncbi_summary(uniprot_ids, email)
GOW_topgene_summs <- data.frame(UniProt_ID = names(GOW_summaries), Summary = unlist(GOW_summaries), stringsAsFactors = FALSE)
# Write to CSV
write.csv(GOW_topgene_summs, "/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/GOW_topgene_summs.csv", row.names = FALSE)
```
### FOW
```{r}
top_genes <- read.csv("/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/Top_50_genes/FOW_topgenes.csv")
# Example usage
uniprot_ids <- c(top_genes$uniprot_accession) # Replace with your list
email <- "sgleuch@uw.edu" # Replace with your email
# Set email for NCBI API
set_entrez_key(email)
summaries <- fetch_ncbi_summary(uniprot_ids, email)
topgene_summs <- data.frame(UniProt_ID = names(summaries), Summary = unlist(summaries), stringsAsFactors = FALSE)
# Write to CSV
write.csv(topgene_summs, "/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/FOW_topgene_summs.csv", row.names = FALSE)
```
### GOA
```{r}
top_genes <- read.csv("/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/Top_50_genes/GOA_topgenes.csv")
# Example usage
uniprot_ids <- c(top_genes$uniprot_accession) # Replace with your list
email <- "sgleuch@uw.edu" # Replace with your email
# Set email for NCBI API
set_entrez_key(email)
summaries <- fetch_ncbi_summary(uniprot_ids, email)
topgene_summs <- data.frame(UniProt_ID = names(summaries), Summary = unlist(summaries), stringsAsFactors = FALSE)
# Write to CSV
write.csv(topgene_summs, "/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/GOA_topgene_summs.csv", row.names = FALSE)
```
### FOA
```{r}
top_genes <- read.csv("/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/Top_50_genes/FOA_topgenes.csv")
# Example usage
uniprot_ids <- c(top_genes$uniprot_accession) # Replace with your list
email <- "sgleuch@uw.edu" # Replace with your email
# Set email for NCBI API
set_entrez_key(email)
summaries <- fetch_ncbi_summary(uniprot_ids, email)
topgene_summs <- data.frame(UniProt_ID = names(summaries), Summary = unlist(summaries), stringsAsFactors = FALSE)
# Write to CSV
write.csv(topgene_summs, "/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/FOA_topgene_summs.csv", row.names = FALSE)
```
### GDO
```{r}
top_genes <- read.csv("/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/Top_50_genes/GDO_topgenes.csv")
# Example usage
uniprot_ids <- c(top_genes$uniprot_accession) # Replace with your list
email <- "sgleuch@uw.edu" # Replace with your email
# Set email for NCBI API
set_entrez_key(email)
summaries <- fetch_ncbi_summary(uniprot_ids, email)
topgene_summs <- data.frame(UniProt_ID = names(summaries), Summary = unlist(summaries), stringsAsFactors = FALSE)
# Write to CSV
write.csv(topgene_summs, "/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/GDO_topgene_summs.csv", row.names = FALSE)
```
### FDO
```{r}
top_genes <- read.csv("/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/Top_50_genes/FDO_topgenes.csv")
# Example usage
uniprot_ids <- c(top_genes$uniprot_accession) # Replace with your list
email <- "sgleuch@uw.edu" # Replace with your email
# Set email for NCBI API
set_entrez_key(email)
summaries <- fetch_ncbi_summary(uniprot_ids, email)
topgene_summs <- data.frame(UniProt_ID = names(summaries), Summary = unlist(summaries), stringsAsFactors = FALSE)
# Write to CSV
write.csv(topgene_summs, "/home/shared/8TB_HDD_02/graceleuchtenberger/Github/byssus-exp-analysis/output/Top_gene_summaries/FDO_topgene_summs.csv", row.names = FALSE)
```
```{r}
test_result <- entrez_search(db="gene", term="Q9WVR6", retmax=1, api_key="8f817031916b3cfee7ec443b712c426d9a08")
print(test_result$ids)
```
```{r}
summary_results <- entrez_summary(db = "gene", id = "84551", api_key="8f817031916b3cfee7ec443b712c426d9a08")
```