---
title: "18S Yellow Island eDNA using the phyloseq pipeline after reviewing the qc steps using dada2"
author: "Chris"
date: "2024-09"
output: html_document
---

#18S workflow
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Load packages
```{r}

#library(decontam)
library(devtools)
library("dada2")
library("Biostrings")
```

# Future packages - move these to wherever I need them
```{r}
library(ggplot2)
library(vegan)
library(phyloseq)
library(dplyr)
library(tibble)
library(reshape2)
```


# update installation for dada2 - only need to run this once
```{r}

#devtools::install_github("benjjneb/dada2", ref="v1.16") # change the ref argument to get other versions

```

# interface with google drive - don't need for raven, only for personal computer
```{r}

#install.packages("googledrive")

#library(googledrive)
#drive_auth()  # This will prompt you to log in and authorize R to access your Google Drive

```

# pull zip files
```{r}

#drive_find()  # This lists the files available in your Google Drive
#OR
#my_file <- drive_get("file_name.csv")  # Specific file sourcing


```


# Load data
## three df's are needed to work through the phyloseq pipeline - the sequences, the asv's and the metadata
```{r}

#metadata table
meta<- read.csv("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/yimetadata.csv")

#asv
#asvF<- readDNAStringSet("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/asv/18S_Euk/paired/cllmkhe960001l50fyz8mftdj-18S_Euk-paired_F.fasta", format= "fasta")
#asvR<- readDNAStringSet("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/asv/18S_Euk/paired/cllmkhe960001l50fyz8mftdj-18S_Euk-paired_F.fasta", format= "fasta")

#asvF<- read.table("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/asv/18S_Euk/paired/cllmkhe960001l50fyz8mftdj-18S_Euk-paired_F.asv", header= TRUE, sep= "\t", stringsAsFactors = FALSE)
#asvR<- read.table("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/asv/18S_Euk/paired/cllmkhe960001l50fyz8mftdj-18S_Euk-paired_R.asv", header= TRUE, sep= "\t", stringsAsFactors = FALSE)

#taxonomy
tax18<- read.delim("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/Tronko_Results/Yellow_Sep5_q35_18S_Max5.txt", header= TRUE, sep= "\t", fill= TRUE, stringsAsFactors = FALSE)
#y18S_30<- read.csv("/home/shared/8TB_HDD_02/cnmntgna/GitHub/YellowIsland2023/data/Tronko_Results/Yellow_Sep5_q35_18S_Max30.txt")


```

# check fastq files
```{r}
path <- "/Users/cmantegna/Desktop/ysequences" 
list.files(path)
```

# prep asv's for merging if coming from tables/ csv's
```{r}

library(tidyr)

# Add a column indicating read type
asvF$read_type <- "forward"
asvR$read_type <- "reverse"

# Reshape to long format 
long_asvF <- asvF %>%
  pivot_longer(cols = 3:(ncol(asvF)-1), names_to = "sample", values_to = "abundance")

long_asvR <- asvR %>%
  pivot_longer(cols = 3:(ncol(asvR)-1), names_to = "sample", values_to = "abundance")

```

```{r}

library(dplyr)

#asvC <- full_join(long_asvF, long_asvR, by = "sample")

# Create OTU table
otu_table <- otu_table(as.matrix(asv_combined[, -1]), taxa_are_rows = TRUE)

# Create taxonomy table
tax_table <- tax_table(as.matrix(tronko_table_1[, -1]))  # Assuming tronko_table_1 has taxa info
# Note: You may need to merge or adjust your TRONKO tables similarly if they complement each other

# Create sample data
sample_data <- sample_data(metadata)

# Optionally, create a phylogenetic tree if you have one
# tree <- read.tree("path_to_tree_file")

```