#!/usr/bin/env python3
"""
Calculate conversion rates at each funnel stage:
Traffic → Lead → MQL → Customer
"""

import json
from pathlib import Path
from collections import defaultdict

def calculate_conversion_rates(extracted_data):
    """Calculate all conversion rates from extracted data."""
    
    gsc_data = extracted_data.get('gsc', {})
    hubspot_data = extracted_data.get('hubspot', {})
    ga4_data = extracted_data.get('ga4', {})
    sistrix_data = extracted_data.get('sistrix', {})
    
    # Get monthly data
    leads = hubspot_data.get('leads', {})
    mqls = hubspot_data.get('mqls', {})
    customers = hubspot_data.get('customers', {})
    clicks = {k: v.get('clicks', 0) for k, v in gsc_data.items()}
    
    # Use SISTRIX monthly traffic data for sessions (more accurate than estimating from clicks)
    # SISTRIX shows daily traffic, so divide by ~30 to get approximate monthly sessions
    # Or use GA4 total and distribute proportionally
    total_ga4_sessions = ga4_data.get('sessions', 125585)
    total_sistrix_traffic = sum(v.get('organic_traffic', 0) for v in sistrix_data.values() if isinstance(v, dict))
    
    # Calculate monthly conversion rates
    monthly_rates = {}
    
    # Get all months from available data
    all_months = set(list(leads.keys()) + list(mqls.keys()) + list(customers.keys()) + list(clicks.keys()))
    all_months = sorted([m for m in all_months if m.startswith('2025')])
    
    # Use SISTRIX traffic data for monthly sessions (it's daily, so approximate monthly)
    # SISTRIX traffic is daily, so for monthly we can use it directly or scale
    # For now, estimate monthly sessions from GSC clicks using GA4 ratio
    total_clicks = sum(clicks.values())
    if total_clicks > 0:
        clicks_to_sessions_ratio = total_ga4_sessions / total_clicks
    else:
        clicks_to_sessions_ratio = 1.0
    
    for month in all_months:
        month_leads = leads.get(month, 0)
        month_mqls = mqls.get(month, 0)
        month_customers = customers.get(month, 0)
        month_clicks = clicks.get(month, 0)
        month_sessions = int(month_clicks * clicks_to_sessions_ratio) if month_clicks > 0 else 0
        
        # Calculate conversion rates
        traffic_to_lead = (month_leads / month_sessions * 100) if month_sessions > 0 else 0
        lead_to_mql = (month_mqls / month_leads * 100) if month_leads > 0 else 0
        mql_to_customer = (month_customers / month_mqls * 100) if month_mqls > 0 else 0
        traffic_to_mql = (month_mqls / month_sessions * 100) if month_sessions > 0 else 0
        traffic_to_customer = (month_customers / month_sessions * 100) if month_sessions > 0 else 0
        
        monthly_rates[month] = {
            'sessions': month_sessions,
            'clicks': month_clicks,
            'leads': month_leads,
            'mqls': month_mqls,
            'customers': month_customers,
            'conversion_rates': {
                'traffic_to_lead': round(traffic_to_lead, 2),
                'lead_to_mql': round(lead_to_mql, 2),
                'mql_to_customer': round(mql_to_customer, 2),
                'traffic_to_mql': round(traffic_to_mql, 2),
                'traffic_to_customer': round(traffic_to_customer, 2)
            }
        }
    
    # Calculate overall averages
    total_leads = sum(leads.values())
    total_mqls = sum(mqls.values())
    total_customers = sum(customers.values())
    
    overall_rates = {
        'total_sessions': total_sessions,
        'total_clicks': total_clicks,
        'total_leads': total_leads,
        'total_mqls': total_mqls,
        'total_customers': total_customers,
        'traffic_to_lead': round((total_leads / total_sessions * 100), 2) if total_sessions > 0 else 0,
        'lead_to_mql': round((total_mqls / total_leads * 100), 2) if total_leads > 0 else 0,
        'mql_to_customer': round((total_customers / total_mqls * 100), 2) if total_mqls > 0 else 0,
        'traffic_to_mql': round((total_mqls / total_sessions * 100), 2) if total_sessions > 0 else 0,
        'traffic_to_customer': round((total_customers / total_sessions * 100), 2) if total_sessions > 0 else 0
    }
    
    return {
        'monthly': monthly_rates,
        'overall': overall_rates
    }

def calculate_growth_trends(monthly_rates):
    """Calculate month-over-month growth rates."""
    months = sorted(monthly_rates.keys())
    growth_rates = {}
    
    for i in range(1, len(months)):
        prev_month = months[i-1]
        curr_month = months[i]
        
        prev_sessions = monthly_rates[prev_month]['sessions']
        curr_sessions = monthly_rates[curr_month]['sessions']
        sessions_growth = ((curr_sessions - prev_sessions) / prev_sessions * 100) if prev_sessions > 0 else 0
        
        prev_leads = monthly_rates[prev_month]['leads']
        curr_leads = monthly_rates[curr_month]['leads']
        leads_growth = ((curr_leads - prev_leads) / prev_leads * 100) if prev_leads > 0 else 0
        
        prev_mqls = monthly_rates[prev_month]['mqls']
        curr_mqls = monthly_rates[curr_month]['mqls']
        mqls_growth = ((curr_mqls - prev_mqls) / prev_mqls * 100) if prev_mqls > 0 else 0
        
        prev_customers = monthly_rates[prev_month]['customers']
        curr_customers = monthly_rates[curr_month]['customers']
        customers_growth = ((curr_customers - prev_customers) / prev_customers * 100) if prev_customers > 0 else 0
        
        growth_rates[curr_month] = {
            'sessions_growth': round(sessions_growth, 2),
            'leads_growth': round(leads_growth, 2),
            'mqls_growth': round(mqls_growth, 2),
            'customers_growth': round(customers_growth, 2)
        }
    
    return growth_rates

def main():
    """Main calculation function."""
    print("Calculating conversion rates...")
    
    # Load extracted data
    script_dir = Path(__file__).parent.absolute()
    data_file = script_dir.parent / "06-DATA-ANALYSIS" / "extracted_metrics.json"
    if not data_file.exists():
        print(f"Error: {data_file} not found. Run extract_metrics.py first.")
        return
    
    with open(data_file, 'r', encoding='utf-8') as f:
        extracted_data = json.load(f)
    
    # Calculate conversion rates
    conversion_data = calculate_conversion_rates(extracted_data)
    
    # Calculate growth trends
    growth_data = calculate_growth_trends(conversion_data['monthly'])
    conversion_data['growth'] = growth_data
    
    # Save results
    script_dir = Path(__file__).parent.absolute()
    output_file = script_dir.parent / "06-DATA-ANALYSIS" / "conversion_analysis.json"
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(conversion_data, f, indent=2, ensure_ascii=False)
    
    print(f"\n✓ Conversion rates calculated and saved to {output_file}")
    print(f"\nOverall Conversion Rates:")
    print(f"  Traffic → Lead: {conversion_data['overall']['traffic_to_lead']}%")
    print(f"  Lead → MQL: {conversion_data['overall']['lead_to_mql']}%")
    print(f"  MQL → Customer: {conversion_data['overall']['mql_to_customer']}%")
    print(f"  Traffic → Customer: {conversion_data['overall']['traffic_to_customer']}%")
    
    return conversion_data

if __name__ == "__main__":
    main()

