call varients for hatchery

for i in {01..05}; do
  /home/shared/bcftools-1.14/bcftools mpileup -Ou -f \
  /home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \
  /home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.${i}.bam \
  | /home/shared/bcftools-1.14/bcftools call -mv -Ov \
  -o output/H${i}.vcf
done

call varients for wildtype

for i in {01..05}; do
  /home/shared/bcftools-1.14/bcftools mpileup -Ou -f \
  /home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \
  /home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.${i}.bam \
  | /home/shared/bcftools-1.14/bcftools call -mv -Ov -o \
  output/W${i}.vcf
done

Merge so there can be a comparison

for f in output/*.vcf; do
  /home/shared/htslib-1.14/bgzip "$f"         # compress to .vcf.gz
  /home/shared/htslib-1.14/tabix -p vcf "$f.gz"  # index for random access
done
ls output/*.vcf.gz > output/vcflist.txt
/home/shared/bcftools-1.14/bcftools merge \
-l output/vcflist.txt -Oz -o output/merged.vcf.gz

/home/shared/bcftools-1.14/bcftools index output/merged.vcf.gz
/home/shared/bcftools-1.14/bcftools query -l output/merged.vcf.gz
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.01.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.02.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.03.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.04.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.05.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.01.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.02.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.03.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.04.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.05.bam

Compare FST

Create text file that list samples

/home/shared/vcftools-0.1.16/bin/vcftools --gzvcf output/merged.vcf.gz \
  --weir-fst-pop hatchery.txt \
  --weir-fst-pop wild.txt \
  --out output/hatchery_vs_wild
head -20 output/hatchery_vs_wild.weir.fst 
CHROM   POS WEIR_AND_COCKERHAM_FST
Contig0 1975    -nan
Contig0 2017    -nan
Contig0 2025    -nan
Contig0 2034    -nan
Contig0 2044    -nan
Contig0 7951    -nan
Contig0 8075    -nan
Contig0 18797   -nan
Contig0 52863   -nan
Contig0 52875   -nan
Contig0 52903   -nan
Contig0 52942   0.6
Contig0 52944   0.6
Contig0 52980   0.6
Contig0 53000   -nan
Contig0 53001   -nan
Contig0 53015   -nan
Contig0 53035   -nan
Contig0 53070   -nan

🧠 Interpretation

•   A mean FST near zero (or negative) suggests little to no genetic differentiation between the hatchery and wild populations.


•   Some negative FST values are possible due to sampling noise or low allele frequency variance; they are usually interpreted as zero.


•   A few high FST values (up to 1) indicate loci with strong differentiation — potentially under selection or drift.
# Install if not already installed
#install.packages("DT")

# Load the package
library(DT)

# Read your CSV file
fst_data <- read.csv("output/Top_1__FST_Loci.csv")

# Display interactive table
datatable(fst_data, options = list(pageLength = 10, scrollX = TRUE), rownames = FALSE)

PCA

/home/shared/plink_linux_x86_64_20230116/plink \
--vcf output/merged.vcf.gz \
--make-bed \
--double-id \
--allow-extra-chr \
--out output/merged


/home/shared/plink_linux_x86_64_20230116/plink \
--bfile output/merged \
--pca \
--allow-extra-chr \
--out pca
# Load the eigenvec file
pca <- read.table("pca.eigenvec", header = FALSE)

# View first few sample IDs
head(pca[, 2])  # Column 2 is the IID (sample name)
# Load data
pca <- read.table("pca.eigenvec", header = FALSE)

# Assign column names
colnames(pca) <- c("FID", "IID", paste0("PC", 1:(ncol(pca)-2)))

# Assign colors: "red" for 18H samples, "gray" for others
colors <- ifelse(grepl("18H", pca$IID), "red", "gray")

# Plot PC1 vs PC2 with color
plot(pca$PC1, pca$PC2, 
     col = colors,
     xlab = "PC1", ylab = "PC2", 
     main = "PCA of Hatchery vs Wild", 
     pch = 19)

# Optional: add a legend
legend("topright", legend = c("Hatcher", "Wild"), col = c("red", "gray"), pch = 19)