--- title: "14.1-joining-annotations" output: html_document --- ```{r} library(tidyverse) ``` Bringing in the list of 46753 CDS IDs ```{r} cdslist <- read.csv("../analyses/GCF_000297895_cdslist.tab", header = FALSE) ``` ```{r} uniprot <- read.csv("../analyses/Cg-blastx-sp.tab.txt", header = FALSE, sep="\t") %>% distinct(V1, .keep_all = TRUE) ``` Spur blastx wit gene symbols ```{r} spurLOC <- read.csv("../analyses/Spur-blastx-LOCsym.tab", header = TRUE, sep="\t") ``` Drosophila ``` !{bldir}blastn \ -task blastn \ -query ../data/GCF_000297895.1_oyster_v9_cds_from_genomic.fna \ -db ../blastdb/dmel-all-gene-r6.37 \ -out ../analyses/Cg-dmel-gene_blastn_02.tab \ -evalue 1E-05 \ -num_threads 1 \ -max_target_seqs 1 \ -max_hsps 1 \ -outfmt "6 qaccver saccver evalue" ``` ```{r} dmel <- read.csv("../analyses/Cg-dmel-gene_blastn_02.tab", header = FALSE, sep="\t") %>% distinct(V1, .keep_all = TRUE) ``` Celg Gene symbols ```{r} celsym <- read.csv("../analyses/Cg_Ce-genesym.tab", header = FALSE, sep="\t") ``` JOIN ```{r} comb <- left_join(cdslist, uniprot, by = "V1") %>% left_join(spurLOC, by = c("V1" ="V1.x")) %>% left_join(dmel, by = "V1") %>% left_join(celsym, by = c("V1" = "V2")) %>% select(V1, V2.x, V3.x, V2.x.x, V11, V1.y, V3.y, V4.x, V2.y.y, V3.x.x, V1.y.y, V3.y.y, V4.y) %>% rename(cdsID = V1, uniprot = V2.x, u_ev = V3.x, spurAcc = V2.x.x, s_ev = V11, spID02 = V1.y, sp_ecb = V3.y, sp_sym = V4.x, dmel = V2.y.y, dmel_ev = V3.x.x, cel_ID = V1.y.y, cel_ev = V3.y.y, cel_sym = V4.y) ``` # GOing back to Giles Blast ../data/bestblast_/bestblast.tab ```{r} giles <- read.csv("../data/bestblast_/bestblast.tab", header = TRUE, sep = "\t") %>% select(ID, HitAcc, HitDesc, E.Value) ``` ```{r} comb_g <- left_join(comb, giles, by = c("cdsID" = "ID")) ``` ```{r} loc <- read.csv("../data/bestblast_/LOC_seqID_key.tab", header = FALSE, sep = "\t") ``` ```{r} loc_dt <- left_join(loc, comb_g, by = c("V1" = "cdsID")) ``` ```{r} write_tsv( loc_dt, "../analyses/Cg-annot-join-V1.2.tab" ) ``` JOining Marker file ```{r} top <- read.csv("../data/topmarker_bla_mitov7_top10perCluster.txt", header = TRUE, sep = "\t") ``` ```{r} annot <- read.csv("../analyses/Cg-annot-join-V1.2.tab", header = TRUE, sep = "\t") ``` ```{r} library(tidyverse) ``` ```{r} top.annotv1.2 <- left_join(top, annot, by = c("gene_id" = "V2")) ``` ```{r} write_tsv( top.annotv1.2, "../analyses/top.annotv1.2.tab" ) ```