--- title: "18S Yellow Island eDNA using the phyloseq pipeline after reviewing the qc steps using dada2" author: "Chris" date: "2024-09" output: html_document --- #18S workflow ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Load packages ```{r} #library(decontam) library(devtools) library("dada2") library("Biostrings") ``` # Future packages - move these to wherever I need them ```{r} library(ggplot2) library(vegan) library(phyloseq) library(dplyr) library(tibble) library(reshape2) ``` # update installation for dada2 - only need to run this once ```{r} #devtools::install_github("benjjneb/dada2", ref="v1.16") # change the ref argument to get other versions ``` # interface with google drive - don't need for raven, only for personal computer ```{r} #install.packages("googledrive") #library(googledrive) #drive_auth() # This will prompt you to log in and authorize R to access your Google Drive ``` # pull zip files ```{r} #drive_find() # This lists the files available in your Google Drive #OR #my_file <- drive_get("file_name.csv") # Specific file sourcing ``` # Load data ## three df's are needed to work through the phyloseq pipeline - the sequences, the asv's and the metadata ```{r} #metadata table meta<- read.csv("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/yimetadata.csv") #asv #asvF<- readDNAStringSet("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/asv/18S_Euk/paired/cllmkhe960001l50fyz8mftdj-18S_Euk-paired_F.fasta", format= "fasta") #asvR<- readDNAStringSet("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/asv/18S_Euk/paired/cllmkhe960001l50fyz8mftdj-18S_Euk-paired_F.fasta", format= "fasta") #asvF<- read.table("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/asv/18S_Euk/paired/cllmkhe960001l50fyz8mftdj-18S_Euk-paired_F.asv", header= TRUE, sep= "\t", stringsAsFactors = FALSE) #asvR<- read.table("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/asv/18S_Euk/paired/cllmkhe960001l50fyz8mftdj-18S_Euk-paired_R.asv", header= TRUE, sep= "\t", stringsAsFactors = FALSE) #taxonomy tax18<- read.delim("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/Tronko_Results/Yellow_Sep5_q35_18S_Max5.txt", header= TRUE, sep= "\t", fill= TRUE, stringsAsFactors = FALSE) #y18S_30<- read.csv("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/Tronko_Results/Yellow_Sep5_q35_18S_Max30.txt") ``` # check fastq files ```{r} path <- "/Users/cmantegna/Desktop/ysequences" list.files(path) ``` # prep asv's for merging if coming from tables/ csv's ```{r} library(tidyr) # Add a column indicating read type asvF$read_type <- "forward" asvR$read_type <- "reverse" # Reshape to long format long_asvF <- asvF %>% pivot_longer(cols = 3:(ncol(asvF)-1), names_to = "sample", values_to = "abundance") long_asvR <- asvR %>% pivot_longer(cols = 3:(ncol(asvR)-1), names_to = "sample", values_to = "abundance") ``` ```{r} library(dplyr) #asvC <- full_join(long_asvF, long_asvR, by = "sample") # Create OTU table otu_table <- otu_table(as.matrix(asv_combined[, -1]), taxa_are_rows = TRUE) # Create taxonomy table tax_table <- tax_table(as.matrix(tronko_table_1[, -1])) # Assuming tronko_table_1 has taxa info # Note: You may need to merge or adjust your TRONKO tables similarly if they complement each other # Create sample data sample_data <- sample_data(metadata) # Optionally, create a phylogenetic tree if you have one # tree <- read.tree("path_to_tree_file") ```