---
title: "14.1-joining-annotations"
output: html_document
---
Redoing with SPur blastx removing evalue. --- will need t join with tables later..
```{r}
library(tidyverse)
```
Bringing in the list of 46753 CDS IDs
```{r}
cdslist <- read.csv("../analyses/GCF_000297895_cdslist.tab", header = FALSE)
```
```{r}
uniprot <- read.csv("../analyses/Cg-blastx-sp.tab.txt", header = FALSE, sep="\t") %>%
distinct(V1, .keep_all = TRUE)
```
Spur blastx wit gene symbols
```{r}
spurLOC <- read.csv("../analyses/Spur-blastx-LOCsym.tab", header = TRUE, sep="\t")
```
Drosophila
```
!{bldir}blastn \
-task blastn \
-query ../data/GCF_000297895.1_oyster_v9_cds_from_genomic.fna \
-db ../blastdb/dmel-all-gene-r6.37 \
-out ../analyses/Cg-dmel-gene_blastn_02.tab \
-evalue 1E-05 \
-num_threads 1 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt "6 qaccver saccver evalue"
```
```{r}
dmel <- read.csv("../analyses/Cg-dmel-gene_blastn_02.tab", header = FALSE, sep="\t") %>%
distinct(V1, .keep_all = TRUE)
```
Celg Gene symbols
```{r}
celsym <- read.csv("../analyses/Cg_Ce-genesym.tab", header = FALSE, sep="\t")
```
JOIN
```{r}
comb <- left_join(cdslist, uniprot, by = "V1") %>%
left_join(spurLOC, by = c("V1" ="V1.x")) %>%
left_join(dmel, by = "V1") %>%
left_join(celsym, by = c("V1" = "V2")) %>%
select(V1, V2.x, V3.x, V2.x.x, V3.x.x, V1.y, V3.y, V4.x, V2.y.y, V3.y.y,
V1.y.y, V3, V4.y) %>%
rename(cdsID = V1, uniprot = V2.x, u_ev = V3.x, spurAcc
= V2.x.x, s_ev = V3.x.x, spID02 = V1.y, sp_ecb = V3.y, sp_sym = V4.x, dmel = V2.y.y, dmel_ev = V3.y.y, cel_ID = V1.y.y, cel_ev = V3, cel_sym = V4.y)
```
# GOing back to Giles Blast
../data/bestblast_/bestblast.tab
```{r}
giles <- read.csv("../data/bestblast_/bestblast.tab", header = TRUE, sep = "\t") %>%
select(ID, HitAcc, HitDesc, E.Value)
```
```{r}
comb_g <- left_join(comb, giles, by = c("cdsID" = "ID"))
```
```{r}
loc <- read.csv("../data/bestblast_/LOC_seqID_key.tab", header = FALSE, sep = "\t")
```
```{r}
loc_dt <- left_join(loc, comb_g, by = c("V1" = "cdsID"))
```
```{r}
write_tsv(
loc_dt,
"../analyses/Cg-annot-join-V1.3.tab"
)
```
JOining Marker file
```{r}
top <- read.csv("../data/topmarker_bla_mitov7_top10perCluster.txt", header = TRUE, sep = "\t")
```
```{r}
annot <- read.csv("../analyses/Cg-annot-join-V1.3.tab", header = TRUE, sep = "\t")
```
```{r}
library(tidyverse)
```
```{r}
top.annotv1.3 <- left_join(top, annot, by = c("gene_id" = "V2"))
```
```{r}
write_tsv(
top.annotv1.3,
"../analyses/top.annotv1.3.tab"
)
```