#!/usr/bin/env python3 """ Full Dataset Context-Dependent Analysis Report Generator This script generates a comprehensive report for the full dataset analysis that was just completed, including detailed visualizations and insights. """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from pathlib import Path import warnings warnings.filterwarnings('ignore') # Set plotting style plt.style.use('seaborn-v0_8') sns.set_palette("husl") class FullAnalysisReportGenerator: def __init__(self, output_dir="output/context_dependent_analysis"): """Initialize the report generator.""" self.output_dir = Path(output_dir) self.results = {} self.load_results() def load_results(self): """Load all analysis results.""" print("📂 Loading full analysis results...") # Load main context-dependent results self.results['methylation_mirna'] = pd.read_csv( self.output_dir / "methylation_mirna_context.csv" ) self.results['lncrna_mirna'] = pd.read_csv( self.output_dir / "lncrna_mirna_context.csv" ) self.results['multi_way'] = pd.read_csv( self.output_dir / "multi_way_interactions.csv" ) # Load context-specific correlations self.results['high_mirna_meth'] = pd.read_csv( self.output_dir / "high_mirna_gene_methylation_correlations.csv" ) self.results['high_mirna_lncrna'] = pd.read_csv( self.output_dir / "high_mirna_gene_lncrna_correlations.csv" ) self.results['high_mirna_mirna'] = pd.read_csv( self.output_dir / "high_mirna_gene_mirna_correlations.csv" ) print(f"✅ Loaded results:") print(f" • Methylation-miRNA context: {len(self.results['methylation_mirna']):,} interactions") print(f" • lncRNA-miRNA context: {len(self.results['lncrna_mirna']):,} interactions") print(f" • Multi-way interactions: {len(self.results['multi_way']):,} analyses") print(f" • High miRNA context correlations: {len(self.results['high_mirna_meth']):,} + {len(self.results['high_mirna_lncrna']):,} + {len(self.results['high_mirna_mirna']):,}") def generate_executive_summary(self): """Generate executive summary of the full analysis.""" print("\n" + "="*80) print("📊 EXECUTIVE SUMMARY - FULL DATASET ANALYSIS") print("="*80) # Overall statistics total_interactions = len(self.results['methylation_mirna']) + len(self.results['lncrna_mirna']) context_dependent_interactions = ( self.results['methylation_mirna']['context_dependent'].sum() + self.results['lncrna_mirna']['context_dependent'].sum() ) print(f"\n🎯 ANALYSIS SCOPE:") print(f" • Total genes analyzed: 36,084 (100% of dataset)") print(f" • Total lncRNAs analyzed: 15,900 (100% of dataset)") print(f" • Total miRNAs analyzed: 51 (100% of dataset)") print(f" • Total methylation sites: 249 (100% of dataset)") print(f" • Total samples: 40") print(f"\n📈 INTERACTION RESULTS:") print(f" • Total regulatory interactions analyzed: {total_interactions:,}") print(f" • Context-dependent interactions: {context_dependent_interactions:,}") print(f" • Context-dependence rate: {context_dependent_interactions/total_interactions*100:.1f}%") print(f"\n🔍 KEY FINDINGS:") print(f" • Methylation-miRNA context interactions: {len(self.results['methylation_mirna']):,}") print(f" • lncRNA-miRNA context interactions: {len(self.results['lncrna_mirna']):,}") print(f" • Multi-way regulatory networks: {len(self.results['multi_way']):,}") # Performance metrics meth_improvement = self.results['methylation_mirna']['improvement_from_interaction'].mean() lncrna_improvement = self.results['lncrna_mirna']['improvement_from_interaction'].mean() print(f"\n📊 PERFORMANCE METRICS:") print(f" • Mean improvement from methylation-miRNA interactions: {meth_improvement:.3f}") print(f" • Mean improvement from lncRNA-miRNA interactions: {lncrna_improvement:.3f}") print(f" • Multi-way interaction success rate: {self.results['multi_way']['has_significant_interactions'].mean()*100:.1f}%") def generate_detailed_visualizations(self): """Generate comprehensive visualizations for the full analysis.""" print("\n🎨 Generating comprehensive visualizations...") # Create figure with subplots fig = plt.figure(figsize=(24, 20)) # 1. Context-dependent interaction distribution ax1 = plt.subplot(3, 3, 1) self._plot_context_dependent_distribution(ax1) # 2. Interaction improvements comparison ax2 = plt.subplot(3, 3, 2) self._plot_interaction_improvements(ax2) # 3. Context strength analysis ax3 = plt.subplot(3, 3, 3) self._plot_context_strength(ax3) # 4. Multi-way interaction analysis ax4 = plt.subplot(3, 3, 4) self._plot_multi_way_interactions(ax4) # 5. Regulatory network complexity ax5 = plt.subplot(3, 3, 5) self._plot_regulatory_complexity(ax5) # 6. Context-specific correlation patterns ax6 = plt.subplot(3, 3, 6) self._plot_context_correlations(ax6) # 7. Interaction type distribution ax7 = plt.subplot(3, 3, 7) self._plot_interaction_types(ax7) # 8. Performance metrics by dataset size ax8 = plt.subplot(3, 3, 8) self._plot_performance_metrics(ax8) # 9. Summary statistics ax9 = plt.subplot(3, 3, 9) self._plot_summary_statistics(ax9) plt.tight_layout() plt.savefig('output/full_analysis_comprehensive_report.png', dpi=300, bbox_inches='tight') print("✅ Comprehensive visualization saved: full_analysis_comprehensive_report.png") def _plot_context_dependent_distribution(self, ax): """Plot distribution of context-dependent vs non-context-dependent interactions.""" meth_context = self.results['methylation_mirna']['context_dependent'].value_counts() lncrna_context = self.results['lncrna_mirna']['context_dependent'].value_counts() x = np.arange(2) width = 0.35 ax.bar(x - width/2, [meth_context.get(True, 0), meth_context.get(False, 0)], width, label='Methylation-miRNA', alpha=0.8) ax.bar(x + width/2, [lncrna_context.get(True, 0), lncrna_context.get(False, 0)], width, label='lncRNA-miRNA', alpha=0.8) ax.set_xlabel('Context-Dependent') ax.set_ylabel('Number of Interactions') ax.set_title('Context-Dependent vs Non-Context-Dependent\nInteractions Distribution') ax.set_xticks(x) ax.set_xticklabels(['False', 'True']) ax.legend() ax.grid(True, alpha=0.3) def _plot_interaction_improvements(self, ax): """Plot distribution of interaction improvements.""" meth_improvements = self.results['methylation_mirna']['improvement_from_interaction'] lncrna_improvements = self.results['lncrna_mirna']['improvement_from_interaction'] ax.hist(meth_improvements, bins=50, alpha=0.7, label='Methylation-miRNA', density=True) ax.hist(lncrna_improvements, bins=50, alpha=0.7, label='lncRNA-miRNA', density=True) ax.set_xlabel('Improvement from Interaction (R²)') ax.set_ylabel('Density') ax.set_title('Distribution of Interaction Improvements') ax.legend() ax.grid(True, alpha=0.3) def _plot_context_strength(self, ax): """Plot context strength analysis.""" meth_strength = self.results['methylation_mirna']['context_strength'].dropna() lncrna_strength = self.results['lncrna_mirna']['context_strength'].dropna() ax.boxplot([meth_strength, lncrna_strength], labels=['Methylation-miRNA', 'lncRNA-miRNA']) ax.set_ylabel('Context Strength') ax.set_title('Context Strength Comparison') ax.grid(True, alpha=0.3) def _plot_multi_way_interactions(self, ax): """Plot multi-way interaction analysis.""" multi_way = self.results['multi_way'] # Count significant interactions significant = multi_way['has_significant_interactions'].sum() total = len(multi_way) ax.pie([significant, total-significant], labels=[f'Significant\n({significant:,})', f'Non-significant\n({total-significant:,})'], autopct='%1.1f%%', startangle=90) ax.set_title('Multi-Way Interaction Success Rate') def _plot_regulatory_complexity(self, ax): """Plot regulatory network complexity.""" # Analyze the number of regulators per gene multi_way = self.results['multi_way'] if 'n_regulators' in multi_way.columns: regulator_counts = multi_way['n_regulators'].value_counts().sort_index() ax.bar(regulator_counts.index, regulator_counts.values, alpha=0.8) ax.set_xlabel('Number of Regulators') ax.set_ylabel('Number of Genes') ax.set_title('Regulatory Complexity Distribution') else: ax.text(0.5, 0.5, 'Regulator count data\nnot available', ha='center', va='center', transform=ax.transAxes) ax.set_title('Regulatory Complexity') ax.grid(True, alpha=0.3) def _plot_context_correlations(self, ax): """Plot context-specific correlation patterns.""" # Sample correlations for visualization high_mirna_meth = self.results['high_mirna_meth'].sample(n=min(1000, len(self.results['high_mirna_meth']))) if len(high_mirna_meth) > 0 and 'correlation' in high_mirna_meth.columns: ax.hist(high_mirna_meth['correlation'], bins=30, alpha=0.8, density=True) ax.set_xlabel('Correlation Coefficient') ax.set_ylabel('Density') ax.set_title('High miRNA Context:\nGene-Methylation Correlations') else: ax.text(0.5, 0.5, 'Correlation data\nnot available', ha='center', va='center', transform=ax.transAxes) ax.set_title('Context-Specific Correlations') ax.grid(True, alpha=0.3) def _plot_interaction_types(self, ax): """Plot distribution of interaction types.""" # Count interaction types if available if 'interaction_type' in self.results['methylation_mirna'].columns: meth_types = self.results['methylation_mirna']['interaction_type'].value_counts() ax.pie(meth_types.values, labels=meth_types.index, autopct='%1.1f%%', startangle=90) ax.set_title('Methylation-miRNA\nInteraction Types') else: ax.text(0.5, 0.5, 'Interaction type data\nnot available', ha='center', va='center', transform=ax.transAxes) ax.set_title('Interaction Types') def _plot_performance_metrics(self, ax): """Plot performance metrics by dataset size.""" # Create performance summary metrics = { 'Total Genes': 36084, 'Total Interactions': len(self.results['methylation_mirna']) + len(self.results['lncrna_mirna']), 'Context-Dependent': self.results['methylation_mirna']['context_dependent'].sum() + self.results['lncrna_mirna']['context_dependent'].sum(), 'Multi-Way Success': self.results['multi_way']['has_significant_interactions'].sum() } ax.bar(metrics.keys(), metrics.values(), alpha=0.8, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']) ax.set_ylabel('Count') ax.set_title('Performance Metrics Summary') plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right') ax.grid(True, alpha=0.3) def _plot_summary_statistics(self, ax): """Plot summary statistics table.""" ax.axis('tight') ax.axis('off') # Create summary table summary_data = [ ['Metric', 'Value'], ['Total Genes Analyzed', '36,084'], ['Total lncRNAs', '15,900'], ['Total miRNAs', '51'], ['Total Methylation Sites', '249'], ['Total Samples', '40'], ['Methylation-miRNA Interactions', f"{len(self.results['methylation_mirna']):,}"], ['lncRNA-miRNA Interactions', f"{len(self.results['lncrna_mirna']):,}"], ['Multi-Way Analyses', f"{len(self.results['multi_way']):,}"], ['Context-Dependent Rate', f"{self.results['methylation_mirna']['context_dependent'].mean()*100:.1f}%"] ] table = ax.table(cellText=summary_data[1:], colLabels=summary_data[0], cellLoc='center', loc='center') table.auto_set_font_size(False) table.set_fontsize(10) table.scale(1, 2) ax.set_title('Full Dataset Analysis Summary', fontsize=14, fontweight='bold') def generate_detailed_insights(self): """Generate detailed insights from the analysis.""" print("\n🔍 GENERATING DETAILED INSIGHTS...") # Top context-dependent interactions print("\n🏆 TOP CONTEXT-DEPENDENT INTERACTIONS:") # Methylation-miRNA top interactions meth_top = self.results['methylation_mirna'].nlargest(5, 'improvement_from_interaction') print(f"\nTop Methylation-miRNA Context Interactions:") for _, row in meth_top.iterrows(): print(f" • {row['target']} | {row['regulator1']} + {row['regulator2']} | Improvement: {row['improvement_from_interaction']:.3f}") # lncRNA-miRNA top interactions lncrna_top = self.results['lncrna_mirna'].nlargest(5, 'improvement_from_interaction') print(f"\nTop lncRNA-miRNA Context Interactions:") for _, row in lncrna_top.iterrows(): print(f" • {row['target']} | {row['regulator1']} + {row['regulator2']} | Improvement: {row['improvement_from_interaction']:.3f}") # Multi-way interaction insights print(f"\n🔗 MULTI-WAY INTERACTION INSIGHTS:") multi_way = self.results['multi_way'] if 'improvement_from_regulators' in multi_way.columns: top_multi = multi_way.nlargest(5, 'improvement_from_regulators') print(f"Top Multi-Way Regulatory Networks:") for _, row in top_multi.iterrows(): print(f" • {row['gene']} | Improvement: {row['improvement_from_regulators']:.3f}") # Context strength analysis print(f"\n💪 CONTEXT STRENGTH ANALYSIS:") meth_strength = self.results['methylation_mirna']['context_strength'].dropna() lncrna_strength = self.results['lncrna_mirna']['context_strength'].dropna() print(f"Methylation-miRNA context strength:") print(f" • Mean: {meth_strength.mean():.3f}") print(f" • Median: {meth_strength.median():.3f}") print(f" • 95th percentile: {meth_strength.quantile(0.95):.3f}") print(f"lncRNA-miRNA context strength:") print(f" • Mean: {lncrna_strength.mean():.3f}") print(f" • Median: {lncrna_strength.median():.3f}") print(f" • 95th percentile: {lncrna_strength.quantile(0.95):.3f}") def save_detailed_report(self): """Save a detailed text report.""" report_path = "output/full_analysis_detailed_report.txt" with open(report_path, 'w') as f: f.write("FULL DATASET CONTEXT-DEPENDENT ANALYSIS REPORT\n") f.write("=" * 60 + "\n\n") f.write("ANALYSIS SCOPE:\n") f.write(f"• Total genes analyzed: 36,084 (100% of dataset)\n") f.write(f"• Total lncRNAs analyzed: 15,900 (100% of dataset)\n") f.write(f"• Total miRNAs analyzed: 51 (100% of dataset)\n") f.write(f"• Total methylation sites: 249 (100% of dataset)\n") f.write(f"• Total samples: 40\n\n") f.write("INTERACTION RESULTS:\n") total_interactions = len(self.results['methylation_mirna']) + len(self.results['lncrna_mirna']) context_dependent = ( self.results['methylation_mirna']['context_dependent'].sum() + self.results['lncrna_mirna']['context_dependent'].sum() ) f.write(f"• Total regulatory interactions: {total_interactions:,}\n") f.write(f"• Context-dependent interactions: {context_dependent:,}\n") f.write(f"• Context-dependence rate: {context_dependent/total_interactions*100:.1f}%\n\n") f.write("PERFORMANCE METRICS:\n") meth_improvement = self.results['methylation_mirna']['improvement_from_interaction'].mean() lncrna_improvement = self.results['lncrna_mirna']['improvement_from_interaction'].mean() f.write(f"• Mean improvement from methylation-miRNA: {meth_improvement:.3f}\n") f.write(f"• Mean improvement from lncRNA-miRNA: {lncrna_improvement:.3f}\n") f.write(f"• Multi-way success rate: {self.results['multi_way']['has_significant_interactions'].mean()*100:.1f}%\n\n") f.write("TOP INTERACTIONS:\n") meth_top = self.results['methylation_mirna'].nlargest(10, 'improvement_from_interaction') f.write("Top 10 Methylation-miRNA Context Interactions:\n") for i, (_, row) in enumerate(meth_top.iterrows(), 1): f.write(f"{i:2d}. {row['target']} | {row['regulator1']} + {row['regulator2']} | Improvement: {row['improvement_from_interaction']:.3f}\n") f.write("\nTop 10 lncRNA-miRNA Context Interactions:\n") lncrna_top = self.results['lncrna_mirna'].nlargest(10, 'improvement_from_interaction') for i, (_, row) in enumerate(lncrna_top.iterrows(), 1): f.write(f"{i:2d}. {row['target']} | {row['regulator1']} + {row['regulator2']} | Improvement: {row['improvement_from_interaction']:.3f}\n") print(f"✅ Detailed report saved: {report_path}") def run_complete_report(self): """Run the complete report generation.""" print("🚀 GENERATING COMPREHENSIVE FULL ANALYSIS REPORT") print("=" * 60) # Generate executive summary self.generate_executive_summary() # Generate detailed visualizations self.generate_detailed_visualizations() # Generate detailed insights self.generate_detailed_insights() # Save detailed report self.save_detailed_report() print("\n" + "=" * 60) print("🎉 COMPREHENSIVE REPORT GENERATION COMPLETED!") print("=" * 60) print("Generated files:") print(" • full_analysis_comprehensive_report.png - Comprehensive visualizations") print(" • full_analysis_detailed_report.txt - Detailed text report") print(" • All results are in output/context_dependent_analysis/") def main(): """Main function to generate the comprehensive report.""" generator = FullAnalysisReportGenerator() generator.run_complete_report() if __name__ == "__main__": main()