---
title: "Project"
output:
  html_document:
    df_print: paged
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(
  echo = TRUE,         # Display code chunks
  eval = FALSE,         # Evaluate code chunks
  warning = FALSE,     # Hide warnings
  message = FALSE,     # Hide messages
  fig.width = 6,       # Set plot width in inches
  fig.height = 4,      # Set plot height in inches
  fig.align = "center", # Align plots to the center
  comment = ""         # Prevents appending '##' to beginning of lines in code output
)
```


# call varients for hatchery 

```{r, engine='bash'}
for i in {01..05}; do
  /home/shared/bcftools-1.14/bcftools mpileup -Ou -f \
  /home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \
  /home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.${i}.bam \
  | /home/shared/bcftools-1.14/bcftools call -mv -Ov \
  -o ~/Karina-chinook/output/H${i}.vcf
done

```

# call varients for wildtype
```{r, engine='bash'}
for i in {01..05}; do
  /home/shared/bcftools-1.14/bcftools mpileup -Ou -f \
  /home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \
  /home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.${i}.bam \
  | /home/shared/bcftools-1.14/bcftools call -mv -Ov \
  -o ~/Karina-chinook/output/W${i}.vcf
done

```

# Merge so there can be a comparison 

```{r, engine='bash'}
for f in ~/Karina-chinook/output/*.vcf; do
  /home/shared/htslib-1.14/bgzip "$f"         # compress to .vcf.gz
  /home/shared/htslib-1.14/tabix -p vcf "$f.gz"  # index for random access
done
```


```{r, engine='bash'}
ls ~/Karina-chinook/output/*.vcf.gz > ~/Karina-chinook/output/vcflist.txt
```


```{r, engine='bash'}
/home/shared/bcftools-1.14/bcftools merge \
-l ~/Karina-chinook/output/vcflist.txt -Oz -o ~/Karina-chinook/output/merged.vcf.gz

/home/shared/bcftools-1.14/bcftools index ~/Karina-chinook/output/merged.vcf.gz
```


```{r, engine='bash', eval=TRUE}
/home/shared/bcftools-1.14/bcftools query -l ~/Karina-chinook/output/merged.vcf.gz
```

# Compare FST

Create text file that list samples  

```{r, engine='bash'}
/home/shared/vcftools-0.1.16/bin/vcftools --gzvcf ~/Karina-chinook/output/merged.vcf.gz \
  --weir-fst-pop hatchery.txt \
  --weir-fst-pop wild.txt \
  --out ~/Karina-chinook/output/hatchery_vs_wild
```

```{r, engine='bash', eval=TRUE}
head -20 ~/Karina-chinook/output/hatchery_vs_wild.weir.fst 
```
# 🧠 Interpretation
	•	A mean FST near zero (or negative) suggests little to no genetic differentiation between the hatchery and wild populations.
	
	
	•	Some negative FST values are possible due to sampling noise or low allele frequency variance; they are usually interpreted as zero.
	
	
	•	A few high FST values (up to 1) indicate loci with strong differentiation — potentially under selection or drift.
	
	
```{r, eval=FALSE}
# Install if not already installed
#install.packages("DT")

# Load the package
library(DT)


# Read your CSV file
hatchery_vs_wild.weir.fst <- read.csv("/home/shared/8TB_HDD_02/thielkla/Karina-chinook/Karina-chinook-v2/project/Top_1__FST_Loci.csv")


# Display interactive table
datatable(hatchery_vs_wild.weir.fst, options = list(pageLength = 10, scrollX = TRUE), rownames = FALSE)
```
=======
# PCA 
	
```{r, engine='bash'}
/home/shared/plink_linux_x86_64_20230116/plink \
--vcf output/merged.vcf.gz \
--make-bed \
--double-id \
--allow-extra-chr \
--out output/merged


/home/shared/plink_linux_x86_64_20230116/plink \
--bfile output/merged \
--pca \
--allow-extra-chr \
--out output/pca
```
	
```{r}
# Load the eigenvec file
pca <- read.table("pca.eigenvec", header = FALSE)

# View first few sample IDs
head(pca[, 2])  # Column 2 is the IID (sample name)
```
	
```{r, eval=TRUE}
# Load data
pca <- read.table("pca.eigenvec", header = FALSE)

# Assign column names
colnames(pca) <- c("FID", "IID", paste0("PC", 1:(ncol(pca)-2)))

# Assign colors: "red" for 18H samples, "gray" for others
colors <- ifelse(grepl("18H", pca$IID), "red", "gray")

# Plot PC1 vs PC2 with color
plot(pca$PC1, pca$PC2, 
     col = colors,
     xlab = "PC1", ylab = "PC2", 
     main = "PCA of Hatchery vs Wild", 
     pch = 19)

# Optional: add a legend
legend("topright", legend = c("Hatcher", "Wild"), col = c("red", "gray"), pch = 19)
```
	
# PCA top 1% FSTs

```{r}
write.table(fst_data$SNP, "output/top_snps.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
```

```{r, engine='bash'}
/home/shared/plink_linux_x86_64_20230116/plink \
      --bfile output/merged \
      --extract top_snps.txt \
      --allow-extra-chr \
      --pca \
      --out output/pca_top1pct
```


```{bash}
head output/merged.bim
```