import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np import re # Load your data # Adjust the filename if needed file = 'annotated_exon_methylation.csv' df = pd.read_csv(file, index_col=0) def get_exon_pos(exon_name): if pd.isnull(exon_name): return None m = re.search(r'-(\d+)$', exon_name) return int(m.group(1)) if m else None if 'exon_position' not in df.columns: df['exon_position'] = df['exon_name'].apply(get_exon_pos) df = df.dropna(subset=['exon_position']) # Create a pivot table: rows=exon_position, columns=methylation_level, values=counts level_order = ['low', 'moderate', 'high'] df['methylation_level'] = pd.Categorical(df['methylation_level'], categories=level_order, ordered=True) heatmap_data = df.groupby(['exon_position', 'methylation_level']).size().unstack(fill_value=0)[level_order] # Log-transform the counts for better visualization heatmap_data_log = np.log10(heatmap_data + 1) # Plot heatmap without annotation numbers plt.figure(figsize=(8, 6)) ax = sns.heatmap(heatmap_data_log, cmap='viridis', annot=False) plt.title('Log10(Count) of Exons by Position and Methylation Level') # Custom x-axis labels ax.set_xlabel('Methylation Level') ax.set_ylabel('Exon Position') ax.set_xticklabels([ 'low (<=10%)', 'moderate (11-79%)', 'high (>=80%)' ]) plt.tight_layout() plt.savefig('exon_position_heatmap_by_methylation_level.png') plt.show()