#!/usr/bin/env python3
"""
Analyze PowerPoint presentation structure.
"""

import sys
import json
from pathlib import Path
from collections import Counter

def analyze_pptx_structure(pptx_path):
    """Analyze the structure of a PPTX presentation."""
    try:
        from pptx import Presentation
    except ImportError:
        print("ERROR: python-pptx library not installed.")
        print("Install with: pip install python-pptx")
        return None
    
    try:
        prs = Presentation(pptx_path)
        
        analysis = {
            "filename": Path(pptx_path).name,
            "total_slides": len(prs.slides),
            "slide_layouts": Counter(),
            "content_types": Counter(),
            "word_count": 0,
            "slide_titles": [],
            "content_by_slide": []
        }
        
        for i, slide in enumerate(prs.slides):
            slide_analysis = {
                "slide_number": i + 1,
                "layout": str(slide.slide_layout.name),
                "title": "",
                "shapes": len(slide.shapes),
                "text_shapes": 0,
                "picture_shapes": 0,
                "chart_shapes": 0,
                "table_shapes": 0,
                "word_count": 0
            }
            
            # Track layout
            analysis["slide_layouts"][slide.slide_layout.name] += 1
            
            # Extract title
            if slide.shapes.title:
                title_text = slide.shapes.title.text.strip()
                slide_analysis["title"] = title_text
                analysis["slide_titles"].append(title_text)
                slide_analysis["word_count"] += len(title_text.split())
            
            # Analyze shapes
            for shape in slide.shapes:
                shape_type = type(shape).__name__
                analysis["content_types"][shape_type] += 1
                
                if hasattr(shape, "text"):
                    slide_analysis["text_shapes"] += 1
                    text = shape.text.strip()
                    if text and (not slide.shapes.title or text != slide.shapes.title.text):
                        slide_analysis["word_count"] += len(text.split())
                
                elif hasattr(shape, "image"):
                    slide_analysis["picture_shapes"] += 1
                
                elif hasattr(shape, "chart"):
                    slide_analysis["chart_shapes"] += 1
                
                elif hasattr(shape, "table"):
                    slide_analysis["table_shapes"] += 1
            
            analysis["word_count"] += slide_analysis["word_count"]
            analysis["content_by_slide"].append(slide_analysis)
        
        return analysis
        
    except Exception as e:
        print(f"ERROR processing {pptx_path}: {e}")
        return None

def generate_report(analysis):
    """Generate a human-readable report."""
    report = []
    
    report.append(f"📊 PRESENTATION ANALYSIS REPORT")
    report.append(f"=================================")
    report.append(f"File: {analysis['filename']}")
    report.append(f"Total slides: {analysis['total_slides']}")
    report.append(f"Total words: {analysis['word_count']}")
    report.append("")
    
    report.append(f"📈 SLIDE LAYOUT DISTRIBUTION:")
    for layout, count in analysis['slide_layouts'].most_common():
        percentage = (count / analysis['total_slides']) * 100
        report.append(f"  {layout}: {count} slides ({percentage:.1f}%)")
    report.append("")
    
    report.append(f"🎨 CONTENT TYPE DISTRIBUTION:")
    total_shapes = sum(analysis['content_types'].values())
    for content_type, count in analysis['content_types'].most_common(5):
        percentage = (count / total_shapes) * 100 if total_shapes > 0 else 0
        report.append(f"  {content_type}: {count} shapes ({percentage:.1f}%)")
    report.append("")
    
    report.append(f"📋 SLIDE TITLES:")
    for i, title in enumerate(analysis['slide_titles'], 1):
        if title:
            report.append(f"  Slide {i}: {title}")
        else:
            report.append(f"  Slide {i}: [No title]")
    report.append("")
    
    report.append(f"🔍 CONTENT BY SLIDE (Top 5):")
    for slide in analysis['content_by_slide'][:5]:
        report.append(f"  Slide {slide['slide_number']}: {slide['title'] or 'No title'}")
        report.append(f"    Layout: {slide['layout']}")
        report.append(f"    Shapes: {slide['shapes']} total")
        if slide['text_shapes'] > 0:
            report.append(f"    Text: {slide['text_shapes']} shapes, {slide['word_count']} words")
        if slide['picture_shapes'] > 0:
            report.append(f"    Images: {slide['picture_shapes']}")
        if slide['chart_shapes'] > 0:
            report.append(f"    Charts: {slide['chart_shapes']}")
        if slide['table_shapes'] > 0:
            report.append(f"    Tables: {slide['table_shapes']}")
        report.append("")
    
    return "\n".join(report)

def main():
    if len(sys.argv) < 2:
        print("Usage: python3 analyze_pptx_structure.py <presentation.pptx> [output.json]")
        sys.exit(1)
    
    pptx_path = sys.argv[1]
    output_path = sys.argv[2] if len(sys.argv) > 2 else None
    
    if not Path(pptx_path).exists():
        print(f"ERROR: File not found: {pptx_path}")
        sys.exit(1)
    
    analysis = analyze_pptx_structure(pptx_path)
    
    if analysis:
        # Generate and print report
        report = generate_report(analysis)
        print(report)
        
        # Save JSON if requested
        if output_path:
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(analysis, f, indent=2, ensure_ascii=False)
            print(f"\nDetailed analysis saved to: {output_path}")
    else:
        sys.exit(1)

if __name__ == "__main__":
    main()