---
title: "Project"
output:
html_document:
df_print: paged
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(
echo = TRUE, # Display code chunks
eval = FALSE, # Evaluate code chunks
warning = FALSE, # Hide warnings
message = FALSE, # Hide messages
fig.width = 6, # Set plot width in inches
fig.height = 4, # Set plot height in inches
fig.align = "center", # Align plots to the center
comment = "" # Prevents appending '##' to beginning of lines in code output
)
```
# call varients for hatchery
```{r, engine='bash'}
for i in {01..05}; do
/home/shared/bcftools-1.14/bcftools mpileup -Ou -f \
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.${i}.bam \
| /home/shared/bcftools-1.14/bcftools call -mv -Ov \
-o ~/Karina-chinook/output/H${i}.vcf
done
```
# call varients for wildtype
```{r, engine='bash'}
for i in {01..05}; do
/home/shared/bcftools-1.14/bcftools mpileup -Ou -f \
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.${i}.bam \
| /home/shared/bcftools-1.14/bcftools call -mv -Ov \
-o ~/Karina-chinook/output/W${i}.vcf
done
```
# Merge so there can be a comparison
```{r, engine='bash'}
for f in ~/Karina-chinook/output/*.vcf; do
/home/shared/htslib-1.14/bgzip "$f" # compress to .vcf.gz
/home/shared/htslib-1.14/tabix -p vcf "$f.gz" # index for random access
done
```
```{r, engine='bash'}
ls ~/Karina-chinook/output/*.vcf.gz > ~/Karina-chinook/output/vcflist.txt
```
```{r, engine='bash'}
/home/shared/bcftools-1.14/bcftools merge \
-l ~/Karina-chinook/output/vcflist.txt -Oz -o ~/Karina-chinook/output/merged.vcf.gz
/home/shared/bcftools-1.14/bcftools index ~/Karina-chinook/output/merged.vcf.gz
```
```{r, engine='bash', eval=TRUE}
/home/shared/bcftools-1.14/bcftools query -l ~/Karina-chinook/output/merged.vcf.gz
```
# Compare FST
Create text file that list samples
```{r, engine='bash'}
/home/shared/vcftools-0.1.16/bin/vcftools --gzvcf ~/Karina-chinook/output/merged.vcf.gz \
--weir-fst-pop hatchery.txt \
--weir-fst-pop wild.txt \
--out ~/Karina-chinook/output/hatchery_vs_wild
```
```{r, engine='bash', eval=TRUE}
head -20 ~/Karina-chinook/output/hatchery_vs_wild.weir.fst
```
# 🧠Interpretation
• A mean FST near zero (or negative) suggests little to no genetic differentiation between the hatchery and wild populations.
• Some negative FST values are possible due to sampling noise or low allele frequency variance; they are usually interpreted as zero.
• A few high FST values (up to 1) indicate loci with strong differentiation — potentially under selection or drift.
```{r, eval=FALSE}
# Install if not already installed
#install.packages("DT")
# Load the package
library(DT)
# Read your CSV file
hatchery_vs_wild.weir.fst <- read.csv("/home/shared/8TB_HDD_02/thielkla/Karina-chinook/Karina-chinook-v2/project/Top_1__FST_Loci.csv")
# Display interactive table
datatable(hatchery_vs_wild.weir.fst, options = list(pageLength = 10, scrollX = TRUE), rownames = FALSE)
```
=======
# PCA
```{r, engine='bash'}
/home/shared/plink_linux_x86_64_20230116/plink \
--vcf output/merged.vcf.gz \
--make-bed \
--double-id \
--allow-extra-chr \
--out output/merged
/home/shared/plink_linux_x86_64_20230116/plink \
--bfile output/merged \
--pca \
--allow-extra-chr \
--out output/pca
```
```{r}
# Load the eigenvec file
pca <- read.table("pca.eigenvec", header = FALSE)
# View first few sample IDs
head(pca[, 2]) # Column 2 is the IID (sample name)
```
```{r, eval=TRUE}
# Load data
pca <- read.table("pca.eigenvec", header = FALSE)
# Assign column names
colnames(pca) <- c("FID", "IID", paste0("PC", 1:(ncol(pca)-2)))
# Assign colors: "red" for 18H samples, "gray" for others
colors <- ifelse(grepl("18H", pca$IID), "red", "gray")
# Plot PC1 vs PC2 with color
plot(pca$PC1, pca$PC2,
col = colors,
xlab = "PC1", ylab = "PC2",
main = "PCA of Hatchery vs Wild",
pch = 19)
# Optional: add a legend
legend("topright", legend = c("Hatcher", "Wild"), col = c("red", "gray"), pch = 19)
```
# PCA top 1% FSTs
```{r}
write.table(fst_data$SNP, "output/top_snps.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
```
```{r, engine='bash'}
/home/shared/plink_linux_x86_64_20230116/plink \
--bfile output/merged \
--extract top_snps.txt \
--allow-extra-chr \
--pca \
--out output/pca_top1pct
```
```{bash}
head output/merged.bim
```