--- title: "dada2 - taxonomy" author: Chris output: pdf_document: toc: yes toc_depth: 2 number_sections : yes --- ```{r knitr_init, echo=FALSE, warning=FALSE, cache=TRUE} library(knitr) #install.packages("rmdformats") #library(rmdformats) library("kableExtra") ## Global options # The following line is necessary to make sure that # everything is not reevaluated each time the file is knit # Note : in the case of this report it is necessary to leave cache= FALSE options(max.print="75") knitr::opts_chunk$set(fig.width=8, fig.height=6, eval=TRUE, cache=TRUE, echo=TRUE, prompt=FALSE, tidy=TRUE, comment=NA, message=FALSE, warning=FALSE) opts_knit$set(width=85) ``` # load more packages ```{r libraries, message=FALSE} library("dada2") library("phyloseq") library("ggplot2") # graphics library("readxl") # necessary to import the data from Excel file library("dplyr") # filter and reformat data frames library("tibble") # Needed for converting column to row names ``` # load data ```{r} getwd() library(readr) # asv from dada2 for otu creation asv<- read_csv("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/ASVtable.csv") ``` # read in reference files to create fasta format ```{r} library(seqinr) # Load your fasta sequences fasta_seqs <- read.fasta("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/18S_Euk.fa") # Load the taxonomy table (assuming tab-delimited format) taxonomy_table <- read.table("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/18S_Euk.tax.tsv", sep = "\t", header = TRUE) # Assuming that the fasta headers correspond to the first column in the taxonomy table for (i in 1:length(fasta_seqs)) { seq_name <- attr(fasta_seqs[[i]], "name") # Get sequence name from fasta tax_info <- taxonomy_table[taxonomy_table$ID == seq_name, ] # Match with taxonomy table # Replace fasta header with taxonomy information (edit this part based on your table format) attr(fasta_seqs[[i]], "name") <- paste0(">", tax_info$taxonomy_path) } # Write the updated fasta file write.fasta(fasta_seqs, names = names(fasta_seqs), file.out = "/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/18S_Euk.fasta") ``` # check the format ```{bash} ``` # taxonomy ```{r} taxa <- assignTaxonomy(seqtab.nochim, "/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/18S_Euk.fasta", multithread=TRUE, taxLevels=c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) ```