---
title: "04.2b"
output: html_document
date: "2024-11-21"
---
```{bash}
grep -r "Mapping efficiency:" ../output/04.2-bismark-align | sort -t/ -k2,2
```
```{bash}
ls ../output/04.2-bismark-align/EF05-EM05-Zygote_score_L0-1.0/EF05-EM05-Zygote_L0-1.0_PE_report.txt
```
```{bash}
# Define directories
output_dir="../output/04.2-bismark-align"
summary_file="${output_dir}/parameter_comparison_summary.csv"
# Initialize summary file
#echo "Sample,Score_Min,Alignment_Rate,Unique_Alignments,Mismatch_Rate,Bisulfite_Efficiency" > ${summary_file}
# Loop through parameter output directories
for dir in ${output_dir}/*_score_*; do
if [ -d "$dir" ]; then
# Extract sample name and score_min parameter from directory name
sample_name=$(basename "$dir" | cut -d'_' -f1)
score_min=$(basename "$dir" | grep -o "score_.*" | sed 's/score_//; s/_/,/g')
# Locate the summary file
summary_file_path="${dir}/${sample_name}_${score_min}_PE_report.txt"
# Extract metrics
mapping=$(grep "Mapping efficiency:" ${summary_file_path} | awk '{print "mapping efficiency ", $3}')
# Append to the summary file
echo "${sample_name},${score_min},${mapping}" >> ${summary_file}
fi
done
```
```{r}
library(ggplot2)
# Load the data
data <- read.csv("../output/04.2-bismark-align/parameter_comparison_summary.csv", sep = ",", header = TRUE)
# Plot alignment rate
ggplot(data, aes(x = Score_Min, y = Alignment_Rate, color = Sample)) +
geom_line() +
theme_minimal() +
labs(title = "Alignment Rate vs. Score_Min",
x = "Score_Min Parameter",
y = "Alignment Rate (%)")
```