---
title: "08-join-annotations"
output: html_document
date: "2023-12-14"
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
# do once
# install.packages("R.utils")
library(tidyverse)
library(R.utils)
```
## Annotating our top DEGs using Blast and Uniprot Gene Ontology (GO) ids
```{r}
#read in DEG results
DEG <- read.csv('../output/1213-DEG.tab', sep = " ")
head(DEG)
```
```{r}
# make column with gene names
genenames <- rownames(DEG)
DEG$genes <- genenames
head(DEG)
```
```{r}
# read in BLAST results
blast <- read.delim("../output/GCF_026571515.1.tab", header=FALSE)
head(blast)
```
```{r}
# make sure the column names are the same for the merge
# not needed if you specify "by.x" and "by.y" in the merge function tho
colnames(blast)[1] <- "genes"
head(blast)
```
```{r}
# join by gene name
DEG_Blast <- merge(x = DEG, y = blast, by = "genes")
head(DEG_Blast)
write_delim(DEG_Blast, "../output/DEG_Blast_Full_Results.tab")
```
```{r}
# make a short file of just the DEG names and other desired data
DEG_annot <- data.frame( geneID = DEG_Blast$genes, geneName = DEG_Blast$V2, baseMean = DEG_Blast$baseMean, log2FoldChange = DEG_Blast$log2FoldChange, pvalue = DEG_Blast$pvalue, padj = DEG_Blast$padj )
head(DEG_annot)
```
```{r}
# retrieve gene names for uniprot id lookup
# this is to get just the numbers from the geneName column for uniprot
# so sp|S8FGV1|LAC... becomes just S8FGV1
uniprot_id <- substr(DEG_annot$geneName, 4, 9)
head(uniprot_id)
# add this list of uniprot id names to the dataframe
DEG_annot$uniprot_id <- uniprot_id
head(DEG_annot)
# write to table for uniprot import (or could just copy and paste but i imported a text file to uniprot just for my own santity to make sure everything was included)
write.table(uniprot_id, "../output/uniprot_id.txt", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
```
```{r}
# retrieved our unirpot id GO file from the web interface https://www.uniprot.org/
# importing and unzipping file here
gunzip("../data/idmapping_2024_01_04.tsv.gz")
GO_id <- read.csv("../data/idmapping_2024_01_04.tsv", sep = '\t', header = TRUE, row.names=NULL)
head(GO_id)
```
```{r}
# join GO id info to our DEG_annot dataframe from earlier by the uniprot ids
clam_GO_annotations <- merge(DEG_annot, GO_id, by.x = "uniprot_id", by.y = "Entry")
head(clam_GO_annotations)
```
```{r}
# siic it looks all good so let's write to file
write.csv(clam_GO_annotations, "../output/clam_GO_annotations.csv")
```