for i in {01..05}; do
/home/shared/bcftools-1.14/bcftools mpileup -Ou -f \
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.${i}.bam \
| /home/shared/bcftools-1.14/bcftools call -mv -Ov \
-o output/H${i}.vcf
done
for i in {01..05}; do
/home/shared/bcftools-1.14/bcftools mpileup -Ou -f \
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.${i}.bam \
| /home/shared/bcftools-1.14/bcftools call -mv -Ov -o \
output/W${i}.vcf
done
for f in output/*.vcf; do
/home/shared/htslib-1.14/bgzip "$f" # compress to .vcf.gz
/home/shared/htslib-1.14/tabix -p vcf "$f.gz" # index for random access
done
ls output/*.vcf.gz > output/vcflist.txt
/home/shared/bcftools-1.14/bcftools merge \
-l output/vcflist.txt -Oz -o output/merged.vcf.gz
/home/shared/bcftools-1.14/bcftools index output/merged.vcf.gz
/home/shared/bcftools-1.14/bcftools query -l output/merged.vcf.gz
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.01.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.02.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.03.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.04.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.05.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.01.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.02.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.03.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.04.bam
/home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.05.bam
Create text file that list samples
/home/shared/vcftools-0.1.16/bin/vcftools --gzvcf output/merged.vcf.gz \
--weir-fst-pop hatchery.txt \
--weir-fst-pop wild.txt \
--out output/hatchery_vs_wild
head -20 output/hatchery_vs_wild.weir.fst
CHROM POS WEIR_AND_COCKERHAM_FST
Contig0 1975 -nan
Contig0 2017 -nan
Contig0 2025 -nan
Contig0 2034 -nan
Contig0 2044 -nan
Contig0 7951 -nan
Contig0 8075 -nan
Contig0 18797 -nan
Contig0 52863 -nan
Contig0 52875 -nan
Contig0 52903 -nan
Contig0 52942 0.6
Contig0 52944 0.6
Contig0 52980 0.6
Contig0 53000 -nan
Contig0 53001 -nan
Contig0 53015 -nan
Contig0 53035 -nan
Contig0 53070 -nan
• A mean FST near zero (or negative) suggests little to no genetic differentiation between the hatchery and wild populations.
• Some negative FST values are possible due to sampling noise or low allele frequency variance; they are usually interpreted as zero.
• A few high FST values (up to 1) indicate loci with strong differentiation — potentially under selection or drift.
# Install if not already installed
#install.packages("DT")
# Load the package
library(DT)
# Read your CSV file
fst_data <- read.csv("output/Top_1__FST_Loci.csv")
# Display interactive table
datatable(fst_data, options = list(pageLength = 10, scrollX = TRUE), rownames = FALSE)
/home/shared/plink_linux_x86_64_20230116/plink \
--vcf output/merged.vcf.gz \
--make-bed \
--double-id \
--allow-extra-chr \
--out output/merged
/home/shared/plink_linux_x86_64_20230116/plink \
--bfile output/merged \
--pca \
--allow-extra-chr \
--out pca
# Load the eigenvec file
pca <- read.table("pca.eigenvec", header = FALSE)
# View first few sample IDs
head(pca[, 2]) # Column 2 is the IID (sample name)
# Load data
pca <- read.table("pca.eigenvec", header = FALSE)
# Assign column names
colnames(pca) <- c("FID", "IID", paste0("PC", 1:(ncol(pca)-2)))
# Assign colors: "red" for 18H samples, "gray" for others
colors <- ifelse(grepl("18H", pca$IID), "red", "gray")
# Plot PC1 vs PC2 with color
plot(pca$PC1, pca$PC2,
col = colors,
xlab = "PC1", ylab = "PC2",
main = "PCA of Hatchery vs Wild",
pch = 19)
# Optional: add a legend
legend("topright", legend = c("Hatcher", "Wild"), col = c("red", "gray"), pch = 19)