---
title: "Identification of differentially expressed isoforms using Ballgown in _S.namaysuch_ non-parasitized/parasitized liver tissue in two different subspecies: lean and siscowet."
author: "Sam White"
date: "08/17/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Use [Ballgown](https://github.com/alyssafrazee/ballgown) for identification of differentially expressed isoforms in _S.namaysuch_ non-parasitized/parasitized liver tissue in two different subspecies: lean and siscowet.

REQUIRES Linux-based system to run all chunks properly; some chunks will not work on Mac OS!

REQUIRES the following Bash programs:

- `wget`

- `tree`

- `md5sum`

REQUIRES the following R libraries:

- [`Ballgown`](https://github.com/alyssafrazee/ballgown) (Bioconductor)

- `tidyverse`

## Load `R` libraries

```{r}
library("ballgown")
library("tidyverse")
library("ggplot2")
library("Rfast")
```


## Set user variables!
```{r set-pval-qval-cutoffs}
# Set maximum pvalue for isoform expression cutoff
pvalue <- 0.05
# Set maximum qvalue (false discovery rate) for isoform expression cutoff
qvalue <- 0.05
```

## Download Ballgown input files.

Notebooks detailing their creation:

- [FastQ trimming](https://robertslab.github.io/sams-notebook/2022/07/06/SRA-Data-S.namaycush-SRA-BioProject-PRJNA674328-Download-and-QC.html)

- [Genome indexing, and exon/splice sites with HISAT2](https://robertslab.github.io/sams-notebook/2022/08/10/Splice-Site-Identification-S.namaycush-Liver-Parasitized-and-Non-Parasitized-SRA-RNAseq-Using-Hisat2-Stingtie-with-Genome-GCF_016432855.1.html)

- [Mapping and identificaion of isoforms with StingTie](https://robertslab.github.io/sams-notebook/2022/08/10/Splice-Site-Identification-S.namaycush-Liver-Parasitized-and-Non-Parasitized-SRA-RNAseq-Using-Hisat2-Stingtie-with-Genome-GCF_016432855.1.html)

```{bash download-bgown-input-tables}
# Download Ballgown input files and directory structure
wget \
--directory-prefix ../data/ballgown \
--recursive \
--no-check-certificate \
--continue \
--cut-dirs 2 \
--no-host-directories \
--no-parent \
--quiet \
--reject "concatenated-fastq-checksums.md5,original-fastq-checksums.md5" \
--reject-regex "/multiqc_data/" \
--accept "*.ctab,*checksums.md5" https://gannet.fish.washington.edu/Atumefaciens/20220810-snam-hisat2-GCF_016432855.1_index-align-stringtie_isoforms/
```

```{bash cleanup}
rm ../data/ballgown/checksums.md5
```


## Verify checksums

NOTE: Warnings are expected, as the checksums files have checksums for files that are not downloaded for this project.
```{bash}
cd ../data/ballgown

pwd
# Make a line
line="-----------------------------------------------------------------------------------------------"
# Set working directory
wd=$(pwd)
# Loop through directories and verify checksums
for directory in */
do
  cd "${directory}"
  # Get sample name; strips trailing slash from directory name
  sample="${directory%/}"
  
  echo ${line}
  echo "${sample}"
  echo ""
  
  # Confirm checksums; sorts for easier reading
  md5sum --check "${sample}"-checksums.md5 | sort -V
  echo ""
  echo "${line}"
  echo ""
  cd ${wd}
done
# Show downloaded directories/files
tree
```

## Find Ballgown installation location
```{r set-ballgown-install-dir}
data_directory <-  system.file('extdata', package='ballgown') # automatically finds ballgown's installation directory
# examine data_directory:
data_directory
```

## Read in _S.namaycush_ genes BED file
```{r read-in-genes-BED}
genes_BED <- read.table(file = "../data/20220818-snam-GCF_016432855.1_SaNama_1.0_genes.bed")
# Add BED column names for more clarity
colnames(genes_BED) <- c("chr", "start", "end", "name", "score", "strand")
head(genes_BED)
```
## Create Ballgown object
```{r create-ballgown-objects}
# Uses regular expression in samplePattern to find all pertinent folders
# Load all measurement data
bg <- ballgown(dataDir="../data/ballgown/", samplePattern='[NP]*', meas='all')
bg
bg.nonparasitized <- ballgown(dataDir="../data/ballgown/", samplePattern='NP*', meas='all')
bg.nonparasitized

bg.nonparasitized.lean <- ballgown(dataDir="../data/ballgown/", samplePattern='NPLL*', meas='all')
bg.nonparasitized.lean

bg.nonparasitized.siscowet <- ballgown(dataDir="../data/ballgown/", samplePattern='NPSL*', meas='all')
bg.nonparasitized.siscowet

bg.parasitized <- ballgown(dataDir="../data/ballgown/", samplePattern='P*', meas='all')
bg.parasitized

bg.parasitized.lean <- ballgown(dataDir="../data/ballgown/", samplePattern='PLL*', meas='all')
bg.parasitized.lean

bg.parasitized.siscowet <- ballgown(dataDir="../data/ballgown/", samplePattern='PSL*', meas='all')
bg.parasitized.siscowet
```

## Download and filter metadata file

Filtered metadata will be used to create a phenotype dataframe needed for Ballgown differential expression analysis.

Will use the 'strain` column.

`strain` column explanation:

lean parasitized = 1
lean nonparasitized = 2
siscowet parasitized = 3
siscowet nonparasitized = 4

```{r create-dataframes-for-ballgwon-pData}
# Read in metadata file from URL
# Data is alreayd properly sorted (by library) to match directory structure - required by Ballgown.
sample_metadata_full <- read.csv("https://raw.githubusercontent.com/RobertsLab/project-lake-trout/main/data/ballgown-metadata.csv")

# View full metadata
head(sample_metadata_full)

# Create modified metadata for LEAN only
# 1 = LEAN PARASTIZED
# 2 = LEAN NONPARASITIZED
sample_metadata_subset_lean_only <- sample_metadata_full %>% filter(strain == "1" | strain == "2")

# Test to make sure expected number of rows (does NOT count header row)
# Expect 12 rows; 6 samples from each grouping
stopifnot(12 == nrow(sample_metadata_subset_lean_only))
head(sample_metadata_subset_lean_only)


# Create modified metadata for SISCOWET only
# 3 = SISCOWET PARASITIZED
# 4 = SISCOWET NONPARASITIZED
sample_metadata_subset_siscowet_only <- sample_metadata_full %>% filter(strain == "3" | strain == "4")

# Test to make sure expected number of rows (does NOT count header row)
# Expect 12 rows; 6 samples from each grouping
stopifnot(12 == nrow(sample_metadata_subset_siscowet_only))
head(sample_metadata_subset_siscowet_only)

# Create modified metadata for PARASITIZED (leand and siscowet combined)
# 1 = LEAN PARASTIZED
# 3 = SISCOWET PARASITIZED
sample_metadata_subset_parasitized <- sample_metadata_full %>% filter(strain == "1" | strain == "3")

# Test to make sure expected number of rows (does NOT count header row)
# Expect 12 rows; 6 samples from each grouping
stopifnot(12 == nrow(sample_metadata_subset_parasitized))
head(sample_metadata_subset_parasitized)


# Create modified metadata for NONPARASITIZED (leand and siscowet combined)
# 1 = LEAN NONPARASTIZED
# 4 = SISCOWET NONPARASITIZED
sample_metadata_subset_nonparasitized <- sample_metadata_full %>% filter(strain == "3" | strain == "4")

# Test to make sure expected number of rows (does NOT count header row)
# Expect 12 rows; 6 samples from each grouping
stopifnot(12 == nrow(sample_metadata_subset_nonparasitized))
head(sample_metadata_subset_nonparasitized)

```


## Load phenotype dataframe into Ballgown object
```{r load-phenotype-full}
# Load phenotype info into Ballgown
pData(bg) <- sample_metadata_full

# Examine phenotype data as it exists in Ballgown
phenotype_table <-  pData(bg)
head(phenotype_table)
```

## Look at exon info
```{r all-exon-info}
# Exon info
structure(bg)$exon
```

## Look at intron info
```{r all-intron-info}
# Intron info
structure(bg)$intron
```

## Look at transcript (isoform) info
```{r all-transcript-info}
# Transcript info
structure(bg)$trans
```


## Load all transcript expression data
```{r load-all-transcript-expression}
# Expression data
whole_tx_table <-  texpr(bg, 'all')

# Rename gene_names listed as a "."
whole_tx_table <- whole_tx_table %>% mutate(gene_name = ifelse(gene_name == ".", t_name, gene_name))
head(whole_tx_table)

# FPKM values for all transcripts
# Converts output to data frame
transcript_fpkm <- as.data.frame(texpr(bg, 'FPKM'))

# Put rownames into column named "t_id" for further manipulation
transcript_fpkm <- rownames_to_column(transcript_fpkm, "t_id")
head(transcript_fpkm)
```


## Load all gene expression data
```{r load-gene-expression-data}
whole_gx_table <-  gexpr(bg)
head(whole_gx_table)
```

```{r write-all-fpkm-to-file}
write.csv(transcript_fpkm,
          file = "../data/fpkm-all_transcripts.csv",
          quote = FALSE,
          row.names = FALSE)
```

```{r write-tx-table-to-file}
# Write whole_tx_table to file
write.csv(whole_tx_table,
          file ="../data/whole_tx_table.csv",
          quote = FALSE,
          row.names = FALSE)
```


## Generate boxplots to compare FPKM across all samples
```{r boxplots-to-compare-FPKM-across-all-samples} 
# Load phenotype info into Ballgown
pData(bg) <- sample_metadata_full

# Pull all transcript expression values stored in FPKM measurement from ballgown object
fpkm <- texpr(bg, meas = "FPKM")

# Log transform data and add 1 to all values to artificially prevent log10(0).
fpkm_df <- as.data.frame(log10(fpkm+1))
head(fpkm_df)

# Rotate data frame
# Creates a "subspecies" column by examining library name length
# Parses out subspecies "L" or "S" depending on length of library name
# L = lean
# S = siscowet
# Then, changes abbreviated subspecies to full, corresponding name
fpkm_df_pivot <- pivot_longer(
  fpkm_df, cols = starts_with("FPKM"), 
  names_to = "library") %>% 
  mutate(subspecies = case_when(
    nchar(library) == 11 ~ str_sub(library, 8, 8),
    nchar(library) == 10 ~ str_sub(library, 7, 7)
    )
  ) %>% 
  mutate(subspecies = replace(subspecies, subspecies == "L", "lean")) %>% 
  mutate(subspecies = replace(subspecies, subspecies == "S", "siscowet"))

head(fpkm_df_pivot)

# Sort data frame by subspecies
fpkm_df_pivot_sorted <- fpkm_df_pivot %>% arrange(subspecies)
head(fpkm_df_pivot_sorted)

# Set unique library names as vector
# Will be used to group boxplot by subspecies
fpkm_libraries_sorted_unique <- (unique(fpkm_df_pivot_sorted$library))
head(fpkm_libraries_sorted_unique)

# Re-order data frame by subspecies-sorted data
fpkm_df_pivot$library <- factor(fpkm_df_pivot$library, levels = fpkm_libraries_sorted_unique)

# Produce boxplots of FPKM for each library
# Grouped by subspecies
ggplot(fpkm_df_pivot, aes(library, value, color = subspecies)) +
  geom_boxplot() +
  ggtitle("Comparisons of transcript FPKM values across all libraries") +
  theme(plot.title = element_text(hjust = 0.5)) +
  ylab("FPKM (log10+1 transformed") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
```

```{r save-boxplot-all-fpkm}
# Save boxplot as PDF
ggsave(filename = "../figures/fpkm-lean-v-siscowet-boxplot.pdf")
```
## Identify differentially expressed transcripts (DETs) between subspecies.
## Returns FoldChange, too (getFC = TRUE)

### THERE ARE NO SAMPLES WITH q-values <=0.05!!!
```{r DETs-lean-vs.-siscowet}
# Set string describing comparison
# Used for final printout in chunk
comparison <- "lean vs. siscowet"

# Load phenotype info into Ballgown
pData(bg) <- sample_metadata_full

# Identify DETs
DET_subspecies_stat_results = stattest(bg, feature='transcript', meas='FPKM', covariate="subspecies", getFC = TRUE)
head(DET_subspecies_stat_results)

# Filter based on p-value and q-value
DET_subspecies_filtered_stat_results <- filter(DET_subspecies_stat_results, pval <= pvalue & qval <= qvalue)
head(DET_subspecies_filtered_stat_results)
```


## Identify differentially expressed transcripts (DETs) between subspecies.
## Returns FoldChange, too (getFC = TRUE)
## Restricted to p-value filtering ONLY. q-values are all >=0.05!!
```{r DETs-lean-vs-siscowet-pval-only}
# Filter based on p-value only
DET_subspecies_pval_filtered_stat_results <- filter(DET_subspecies_stat_results, pval <= pvalue)
head(DET_subspecies_pval_filtered_stat_results)

# Merge with full table to get subset of just differentially expressed transcripts
merged_DET_subspecies_pval_filtered <- merge(x = DET_subspecies_pval_filtered_stat_results, y = whole_tx_table, by.x = "id", by.y = "t_id")
head(merged_DET_subspecies_pval_filtered)

# Convert to BED coordinate system, which is 0-based start
# and end EXCLUSIVE; so need to subtract 1 from start coordinates ONLY
merged_DET_subspecies_pval_filtered_start_minus_1 <- merged_DET_subspecies_pval_filtered %>% mutate(start = start - 1)
head(merged_DET_subspecies_pval_filtered_start_minus_1)

# Filter for lean up-regulated transcripts (i.e. fold-change < 1)
# Convert to BED coordinate system, which is 0-based start
# and end EXCLUSIVE; so need to subtract 1 from start coordinates ONLY
DET_subspecies_pval_filtered_lean <- filter(merged_DET_subspecies_pval_filtered, fc < 1) %>% mutate(start = start - 1)
head(DET_subspecies_pval_filtered_lean)

# Filter for siscowet up-regulated transcripts (i.e. fold-change > 1)
# Convert to BED coordinate system, which is 0-based start
# and end EXCLUSIVE; so need to subtract 1 from start coordinates ONLY
DET_subspecies_pval_filtered_siscowet <- filter(merged_DET_subspecies_pval_filtered, fc > 1) %>% mutate(start = start - 1)
head(DET_subspecies_pval_filtered_siscowet)

# Count number of DET.
count_DET_subspecies_pval_filtered_stat_results <-  nrow(DET_subspecies_pval_filtered_stat_results)

# Count number of lean DET.
count_DET_subspecies_pval_filtered_lean <-  nrow(DET_subspecies_pval_filtered_lean)

# Count number of siscowet DET.
count_DET_subspecies_pval_filtered_siscowet <-  nrow(DET_subspecies_pval_filtered_siscowet)

# Check to make sure total number of DEGs matches sum of each treatment compared DEGs.
# Stops script if counts do not sum to total DEGs.
stopifnot(count_DET_subspecies_pval_filtered_stat_results == 
            sum(count_DET_subspecies_pval_filtered_lean, 
                count_DET_subspecies_pval_filtered_siscowet
                )
          )


# Print number of DET
cat("Number of", comparison, "DET with p-values <= ", pvalue, ":", count_DET_subspecies_pval_filtered_stat_results, "\n")

# Print number of lean DET
cat("Number of lean DET with p-values <= ", pvalue, ":", count_DET_subspecies_pval_filtered_lean, "\n")

# Print number of siscowet DET
cat("Number of siscowet DET with p-values <= ", pvalue, ":", count_DET_subspecies_pval_filtered_siscowet, "\n")
```


```{r write-DETs-lean-vs-siscowet-to-files}
# Write merged dataframe to CSV
write.csv(
  merged_DET_subspecies_pval_filtered, 
  file = "../analyses/DET-subspecies-pval_filtered_p-0.05.csv", 
  quote = FALSE, 
  row.names = FALSE)

# Write pval/qval filtered dataframe to CSV
write.csv(
  DET_subspecies_filtered_stat_results,
  file = "../analyses/DET-subspecies-pval_qval_filtered_p-q-0.05.csv",
  quote = FALSE,
  row.names = FALSE
)


# Creates a BED file of all DETs and inserts necessary columns to create properly formatted BED file. 
write.table(
  (
    merged_DET_subspecies_pval_filtered_start_minus_1 %>% 
    select(chr, start, end, t_name, strand) %>% 
      add_column(
        score = "0", # Inserts a "score" column and assigns a value of "0" to all rows.
        .before = "strand"
        )
    ), 
  file = "../analyses/DET-subspecies-pval_filtered_p-0.05.bed",
  sep = "\t",
  col.names = FALSE,
  row.names = FALSE,
  quote = FALSE
)

# Creates a BED file of only lean DETs and inserts necessary columns to create properly formatted BED file. 
write.table(
  (
    DET_subspecies_pval_filtered_lean %>% 
    select(chr, start, end, t_name, strand) %>% 
      add_column(
        score = "0", # Inserts a "score" column and assigns a value of "0" to all rows.
        .before = "strand"
        )
    ), 
  file = "../analyses/DET-subspecies-lean-pval_filtered_p-0.05.bed",
  sep = "\t",
  col.names = FALSE,
  row.names = FALSE,
  quote = FALSE
)

# Creates a BED file of only siscowet DETs and inserts necessary columns to create properly formatted BED file. 
write.table(
  (
    DET_subspecies_pval_filtered_siscowet %>% 
    select(chr, start, end, t_name, strand) %>% 
      add_column(
        score = "0", # Inserts a "score" column and assigns a value of "0" to all rows.
        .before = "strand"
        )
    ), 
  file = "../analyses/DET-subspecies-siscowet-pval_filtered_p-0.05.bed",
  sep = "\t",
  col.names = FALSE,
  row.names = FALSE,
  quote = FALSE
)
```


## Identify differentially expressed GENES (DEGs) between subspecies.
## Returns FoldChange, too (getFC = TRUE)

### THERE ARE NO SAMPLES WITH q-values <=0.05!!!
```{r DEGs-lean-vs.-siscowet}
# Set string describing comparison
# Used for final printout in chunk
comparison <- "lean vs. siscowet"

# Load phenotype info into Ballgown
pData(bg) <- sample_metadata_full

# Identify DEGs
DEG_subspecies_stat_results = stattest(bg, feature='gene', meas='FPKM', covariate="subspecies", getFC = TRUE)
head(DEG_subspecies_stat_results)

# Filter based on p-value and q-value
DEG_subspecies_filtered_stat_results <- filter(DEG_subspecies_stat_results, pval <= pvalue & qval <= qvalue)
head(DEG_subspecies_filtered_stat_results)

# Count number of DEG.
count_DEG_subspecies_filtered_stat_results <-  nrow(DEG_subspecies_filtered_stat_results)

# Print number of DEG
cat("Number of", comparison, "DEG with p-values <=", pvalue, "&", "q-values <=", qvalue, ":", count_DEG_subspecies_filtered_stat_results, "\n")
```

## Identify differentially expressed GENES (DEGs) between subspecies.
## Returns FoldChange, too (getFC = TRUE)
## Restricted to p-value filtering ONLY. q-values are all >=0.05!!
```{r DEGs-lean-vs-siscowet-pval-only}
# Filter based on p-value only
DEG_subspecies_pval_filtered_stat_results <- filter(DEG_subspecies_stat_results, pval <= pvalue)
head(DEG_subspecies_pval_filtered_stat_results)

# Merge with full table to get subset of just differentially expressed GENES
merged_DEG_subspecies_pval_filtered <- merge(x = DEG_subspecies_pval_filtered_stat_results, y = whole_tx_table, by.x = "id", by.y = "gene_id")
head(merged_DEG_subspecies_pval_filtered)

# Merge with BED file to get DEGs WITHOUT associated transcripts
merged_DEG_subspecies_pval_filtered_genes_only <- merge(x = DEG_subspecies_pval_filtered_stat_results, y = genes_BED, by.x = "id", by.y = "name")
head(merged_DEG_subspecies_pval_filtered_genes_only)

# Convert to BED coordinate system, which is 0-based start
# and end EXCLUSIVE; so need to subtract 1 from start coordinates ONLY
merged_DEG_subspecies_pval_filtered_genes_only <- merged_DEG_subspecies_pval_filtered_genes_only %>% mutate(start = start - 1)
head(merged_DEG_subspecies_pval_filtered_genes_only)

# Filter for lean up-regulated transcripts (i.e. fold-change < 1)
# Convert to BED coordinate system, which is 0-based start
# and end EXCLUSIVE; so need to subtract 1 from start coordinates ONLY
DEG_subspecies_pval_filtered_genes_only_lean <- filter(merged_DEG_subspecies_pval_filtered_genes_only, fc < 1) %>% mutate(start = start - 1)
head(DEG_subspecies_pval_filtered_genes_only_lean)

# Filter for siscowet up-regulated transcripts (i.e. fold-change > 1)
# Convert to BED coordinate system, which is 0-based start
# and end EXCLUSIVE; so need to subtract 1 from start coordinates ONLY
DEG_subspecies_pval_filtered_genes_only_siscowet <- filter(merged_DEG_subspecies_pval_filtered_genes_only, fc > 1) %>% mutate(start = start - 1)
head(DEG_subspecies_pval_filtered_genes_only_siscowet)

# Count number of DEG.
count_DEG_subspecies_pval_filtered_stat_genes_only <-  nrow(merged_DEG_subspecies_pval_filtered_genes_only)


# Count number of lean DEG.
count_DEG_subspecies_pval_filtered_genes_only_lean<-  nrow(DEG_subspecies_pval_filtered_genes_only_lean)


# Count number of siscowet DEG.
count_DEG_subspecies_pval_filtered_genes_only_siscowet <-  nrow(DEG_subspecies_pval_filtered_genes_only_siscowet)

# Check to make sure total number of DEGs matches sum of each treatment compared DEGs.
# Stops script if counts do not sum to total DEGs.
stopifnot(count_DEG_subspecies_pval_filtered_stat_genes_only == 
            sum(count_DEG_subspecies_pval_filtered_genes_only_lean, 
                count_DEG_subspecies_pval_filtered_genes_only_siscowet
                )
          )

# Print number of DEG
cat("Number of", comparison, "DEG with p-values <= ", pvalue, ":", count_DEG_subspecies_pval_filtered_stat_genes_only, "\n")

# Print number of lean DEG
cat("Number of lean DEG with p-values <= ", pvalue, ":", count_DEG_subspecies_pval_filtered_genes_only_lean, "\n")

# Print number of siscowet DEG
cat("Number of siscowet DEG with p-values <= ", pvalue, ":", count_DEG_subspecies_pval_filtered_genes_only_siscowet, "\n")
```

```{r write-DEGs-lean-vs-siscowet-to-files}
# Write merged dataframe to CSV
write.csv(
  merged_DEG_subspecies_pval_filtered_genes_only, 
  file = "../analyses/DEG-subspecies-pval_filtered_p-0.05.csv", 
  quote = FALSE, 
  row.names = FALSE)

# Write pval/qval filtered dataframe to CSV
write.csv(
  DEG_subspecies_filtered_stat_results,
  file = "../analyses/DEG_subspecies-pval_qval_filtered_p-q-0.05.csv",
  quote = FALSE,
  row.names = FALSE
)


# Creates a BED file of all DEGs and inserts necessary columns to create properly formatted BED file. 
write.table(
  (
    merged_DEG_subspecies_pval_filtered_genes_only %>% 
    select(chr, start, end, id, strand) %>% 
      add_column(
        score = "0", # Inserts a "score" column and assigns a value of "0" to all rows.
        .before = "strand"
        )
    ), 
  file = "../analyses/DEG-subspecies-pval_filtered_p-0.05.bed",
  sep = "\t",
  col.names = FALSE,
  row.names = FALSE,
  quote = FALSE
)

# Creates a BED file of only lean DEGs and inserts necessary columns to create properly formatted BED file. 
write.table(
  (
    DEG_subspecies_pval_filtered_lean %>% 
    select(chr, start, end, id, strand) %>% 
      add_column(
        score = "0", # Inserts a "score" column and assigns a value of "0" to all rows.
        .before = "strand"
        )
    ), 
  file = "../analyses/DEG-subspecies-lean-pval_filtered_p-0.05.bed",
  sep = "\t",
  col.names = FALSE,
  row.names = FALSE,
  quote = FALSE
)

# Creates a BED file of only siscowet DEGs and inserts necessary columns to create properly formatted BED file. 
write.table(
  (
    DEG_subspecies_pval_filtered_siscowet %>% 
    select(chr, start, end, id, strand) %>% 
      add_column(
        score = "0", # Inserts a "score" column and assigns a value of "0" to all rows.
        .before = "strand"
        )
    ), 
  file = "../analyses/DEG-subspecies-siscowet-pval_filtered_p-0.05.bed",
  sep = "\t",
  col.names = FALSE,
  row.names = FALSE,
  quote = FALSE
)
```