--- title: "08-homology" author: "Steven Roberts" date: "2023-12-29" output: github_document --- ```{r setup, include=FALSE} library(knitr) library(tidyverse) library(kableExtra) library(DT) library(Biostrings) library(tm) knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = FALSE, # Evaluate code chunks warning = FALSE, # Hide warnings message = FALSE, # Hide messages fig.width = 6, # Set plot width in inches fig.height = 4, # Set plot height in inches fig.align = "center", # Align plots to the center comment = "" # Prevents appending '##' to beginning of lines in code output ) ``` ```{r, engine='bash', eval=TRUE} grep ">" -c ../../D-Apul/output/05.33-lncRNA-discovery/Apul_lncRNA.fasta head -2 ../../D-Apul/output/05.33-lncRNA-discovery/Apul_lncRNA.fasta ``` ```{r, engine='bash', eval=TRUE} grep ">" -c ../../E-Peve/output/05-lncRNA-discovery/Peve_lncRNA.fasta head -2 ../../E-Peve/output/05-lncRNA-discovery/Peve_lncRNA.fasta ``` ```{r, engine='bash', eval=TRUE} grep ">" -c ../../E-Peve/output/Peve_lncRNA.fasta head -2 ../../E-Peve/output/Peve_lncRNA.fasta ``` ```{r, engine='bash', eval=TRUE} grep ">" -c ../../F-Pmea/output/02-lncRNA-discovery/Pmea_lncRNA.fasta head -2 ../../F-Pmea/output/02-lncRNA-discovery/Pmea_lncRNA.fasta ``` ```{r, engine='bash'} cat \ ../../D-Apul/output/05.33-lncRNA-discovery/Apul_lncRNA.fasta \ ../../E-Peve/output/05-lncRNA-discovery/Peve_lncRNA.fasta \ ../../F-Pmea/output/02-lncRNA-discovery/Pmea_lncRNA.fasta \ > ../output/09-homology/merged.fasta ``` ```{r, engine='bash', eval=TRUE} grep ">" -c ../output/09-homology/merged.fasta head -4 ../output/09-homology/merged.fasta ``` ```{r, engine='bash', eval=TRUE} wc -l ../output/08-comparative-BLASTs/*tab head -2 ../output/08-comparative-BLASTs/*tab ``` Joining table in R. ```{r, engine='bash'} perl -e '$count=0; $len=0; while(<>) {s/\r?\n//; s/\t/ /g; if (s/^>//) { if ($. != 1) {print "\n"} s/ |$/\t/; $count++; $_ .= "\t";} else {s/ //g; $len += length($_)} print $_;} print "\n"; warn "\nConverted $count FASTA records in $. lines to tabular format\nTotal sequence length: $len\n\n";' \ ../output/09-homology/merged.fasta > ../output/09-homology/merged.tab ``` ```{r, eval=TRUE} query <- read.csv("../output/09-homology/merged.tab", sep = '\t', header = FALSE, row.names=NULL) ``` ```{r, eval=TRUE} apul <- read.csv("../output/08-comparative-BLASTs/Apul.tab", sep = '\t', header = FALSE, row.names=NULL) ``` ```{r, eval=TRUE} peve <- read.csv("../output/08-comparative-BLASTs/Peve.tab", sep = '\t', header = FALSE, row.names=NULL) ``` ```{r, eval=TRUE} pmea <- read.csv("../output/08-comparative-BLASTs/Pmea.tab", sep = '\t', header = FALSE, row.names=NULL) ``` ```{r, eval=TRUE, cache=TRUE} comp <- left_join(query, apul, by = "V1") %>% left_join(peve, by = "V1") %>% left_join(pmea, by = "V1") %>% select(V1, apul_hit = V2.y, apul_evalue = V11.x, peve_hit = V2.x.x, peve_evalue = V11.y, pmea_hit = V2.y.y, pmea_evalue = V11) %>% rowwise() %>% mutate(Hits = sum(!is.na(c_across(c(apul_hit, peve_hit, pmea_hit))))) ``` ```{r} datatable(comp) ``` ```{r, eval=TRUE} count_table <- table(comp$Hits) print(count_table) ``` Based on this 1967 sequences are found in all species and 5216 sequences are present in 2 species It is worth doing some taxonomy comparisons to see relatedness of species,etc