--- title: "Project" output: html_document: df_print: paged --- ```{r setup, include=FALSE} knitr::opts_chunk$set( echo = TRUE, # Display code chunks eval = FALSE, # Evaluate code chunks warning = FALSE, # Hide warnings message = FALSE, # Hide messages fig.width = 6, # Set plot width in inches fig.height = 4, # Set plot height in inches fig.align = "center", # Align plots to the center comment = "" # Prevents appending '##' to beginning of lines in code output ) ``` # call varients for hatchery ```{r, engine='bash'} for i in {01..05}; do /home/shared/bcftools-1.14/bcftools mpileup -Ou -f \ /home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \ /home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSBM18H/CSMB18H.${i}.bam \ | /home/shared/bcftools-1.14/bcftools call -mv -Ov \ -o ~/Karina-chinook/output/H${i}.vcf done ``` # call varients for wildtype ```{r, engine='bash'} for i in {01..05}; do /home/shared/bcftools-1.14/bcftools mpileup -Ou -f \ /home/shared/8TB_HDD_02/thielkla/Karina-chinook/Olurida_v081.fa \ /home/shared/8TB_HDD_02/thielkla/Karina-chinook/CSMB17W/CSMB17W.${i}.bam \ | /home/shared/bcftools-1.14/bcftools call -mv -Ov \ -o ~/Karina-chinook/output/W${i}.vcf done ``` # Merge so there can be a comparison ```{r, engine='bash'} for f in ~/Karina-chinook/output/*.vcf; do /home/shared/htslib-1.14/bgzip "$f" # compress to .vcf.gz /home/shared/htslib-1.14/tabix -p vcf "$f.gz" # index for random access done ``` ```{r, engine='bash'} ls ~/Karina-chinook/output/*.vcf.gz > ~/Karina-chinook/output/vcflist.txt ``` ```{r, engine='bash'} /home/shared/bcftools-1.14/bcftools merge \ -l ~/Karina-chinook/output/vcflist.txt -Oz -o ~/Karina-chinook/output/merged.vcf.gz /home/shared/bcftools-1.14/bcftools index ~/Karina-chinook/output/merged.vcf.gz ``` ```{r, engine='bash', eval=TRUE} /home/shared/bcftools-1.14/bcftools query -l ~/Karina-chinook/output/merged.vcf.gz ``` # Compare FST Create text file that list samples ```{r, engine='bash'} /home/shared/vcftools-0.1.16/bin/vcftools --gzvcf ~/Karina-chinook/output/merged.vcf.gz \ --weir-fst-pop hatchery.txt \ --weir-fst-pop wild.txt \ --out ~/Karina-chinook/output/hatchery_vs_wild ``` ```{r, engine='bash', eval=TRUE} head -20 ~/Karina-chinook/output/hatchery_vs_wild.weir.fst ``` # 🧠 Interpretation • A mean FST near zero (or negative) suggests little to no genetic differentiation between the hatchery and wild populations. • Some negative FST values are possible due to sampling noise or low allele frequency variance; they are usually interpreted as zero. • A few high FST values (up to 1) indicate loci with strong differentiation — potentially under selection or drift. ```{r, eval=FALSE} # Install if not already installed #install.packages("DT") # Load the package library(DT) # Read your CSV file hatchery_vs_wild.weir.fst <- read.csv("/home/shared/8TB_HDD_02/thielkla/Karina-chinook/Karina-chinook-v2/project/Top_1__FST_Loci.csv") # Display interactive table datatable(hatchery_vs_wild.weir.fst, options = list(pageLength = 10, scrollX = TRUE), rownames = FALSE) ``` ======= # PCA ```{r, engine='bash'} /home/shared/plink_linux_x86_64_20230116/plink \ --vcf output/merged.vcf.gz \ --make-bed \ --double-id \ --allow-extra-chr \ --out output/merged /home/shared/plink_linux_x86_64_20230116/plink \ --bfile output/merged \ --pca \ --allow-extra-chr \ --out output/pca ``` ```{r} # Load the eigenvec file pca <- read.table("pca.eigenvec", header = FALSE) # View first few sample IDs head(pca[, 2]) # Column 2 is the IID (sample name) ``` ```{r, eval=TRUE} # Load data pca <- read.table("pca.eigenvec", header = FALSE) # Assign column names colnames(pca) <- c("FID", "IID", paste0("PC", 1:(ncol(pca)-2))) # Assign colors: "red" for 18H samples, "gray" for others colors <- ifelse(grepl("18H", pca$IID), "red", "gray") # Plot PC1 vs PC2 with color plot(pca$PC1, pca$PC2, col = colors, xlab = "PC1", ylab = "PC2", main = "PCA of Hatchery vs Wild", pch = 19) # Optional: add a legend legend("topright", legend = c("Hatcher", "Wild"), col = c("red", "gray"), pch = 19) ``` # PCA top 1% FSTs ```{r} write.table(fst_data$SNP, "output/top_snps.txt", row.names = FALSE, col.names = FALSE, quote = FALSE) ``` ```{r, engine='bash'} /home/shared/plink_linux_x86_64_20230116/plink \ --bfile output/merged \ --extract top_snps.txt \ --allow-extra-chr \ --pca \ --out output/pca_top1pct ``` ```{bash} head output/merged.bim ```