#!/usr/bin/env python3
"""
Generate monthly forecasts for 2026 combining:
- Baseline growth (Q4 momentum)
- Initiative impacts (templates, tools, comparisons, blog)
- Seasonality adjustments
- Conversion rate improvements
"""

import json
from pathlib import Path
from datetime import datetime

def load_q4_trajectory():
    """Load Q4 growth trajectory analysis."""
    data_file = Path(__file__).parent.parent / "06-DATA-ANALYSIS" / "q4_trajectory_analysis.json"
    with open(data_file, 'r', encoding='utf-8') as f:
        return json.load(f)

def load_initiative_impacts():
    """Load initiative impact models."""
    data_file = Path(__file__).parent.parent / "06-DATA-ANALYSIS" / "initiative_impacts.json"
    with open(data_file, 'r', encoding='utf-8') as f:
        return json.load(f)

def get_seasonality_factors():
    """Get seasonality adjustment factors by quarter."""
    return {
        'Q1': 1.08,   # +8% (post-holiday recovery, budget planning)
        'Q2': 1.12,   # +12% (spring growth, mid-year planning)
        'Q3': 0.92,   # -8% (summer slowdown, vacation impact)
        'Q4': 1.18    # +18% (year-end push, budget spending)
    }

def get_quarter(month_str):
    """Get quarter from month string."""
    month = int(month_str.split('-')[1])
    if month in [1, 2, 3]:
        return 'Q1'
    elif month in [4, 5, 6]:
        return 'Q2'
    elif month in [7, 8, 9]:
        return 'Q3'
    else:
        return 'Q4'

def generate_forecasts(scenario='realistic'):
    """Generate monthly forecasts for 2026."""
    
    # Load data
    q4_data = load_q4_trajectory()
    impacts = load_initiative_impacts()
    seasonality = get_seasonality_factors()
    
    # Baseline growth rates (from Q4)
    baseline = q4_data['baseline_growth_rates']
    
    # Scenario adjustments (more realistic - Q4 was exceptional, can't sustain indefinitely)
    if scenario == 'conservative':
        sessions_mom = 8.0   # 8% MoM (moderate growth)
        leads_mom = 25.0      # 25% MoM (moderate lead growth)
        mqls_mom = 20.0       # 20% MoM
    elif scenario == 'realistic':
        sessions_mom = 12.0  # 12% MoM (sustaining Q4 momentum)
        leads_mom = 40.0     # 40% MoM (strong but sustainable)
        mqls_mom = 35.0      # 35% MoM
    else:  # aggressive
        sessions_mom = 15.0  # 15% MoM (accelerating)
        leads_mom = 60.0     # 60% MoM (very strong)
        mqls_mom = 50.0      # 50% MoM
    
    # Starting point (November 2025)
    current_sessions = 17915
    current_leads = 155
    current_mqls = 63
    current_customers = 3
    
    forecasts = {}
    months = [
        '2026-01', '2026-02', '2026-03', '2026-04', '2026-05', '2026-06',
        '2026-07', '2026-08', '2026-09', '2026-10', '2026-11', '2026-12'
    ]
    
    prev_sessions = current_sessions
    prev_leads = current_leads
    prev_mqls = current_mqls
    
    for month in months:
        quarter = get_quarter(month)
        seasonality_factor = seasonality[quarter]
        month_index = months.index(month)
        
        # Baseline organic growth (sustaining Q4 momentum but with diminishing returns)
        # Growth rate decreases as we scale
        effective_growth = sessions_mom * (1 - 0.015 * month_index)  # 1.5% reduction per month
        effective_growth = max(effective_growth, 5.0)  # Minimum 5% growth
        
        baseline_sessions = prev_sessions * (1 + effective_growth / 100)
        
        # Add initiative impacts (incremental traffic from new content)
        # But scale it down - initiatives take time to mature
        initiative_traffic = impacts[month]['total_traffic'] * 0.6  # 60% of modeled impact (conservative)
        total_sessions = baseline_sessions + initiative_traffic
        
        # Apply seasonality
        total_sessions = total_sessions * seasonality_factor
        
        # Calculate leads (using improved conversion rate)
        conversion_rate = impacts[month]['conversion_rate']
        total_leads = total_sessions * conversion_rate
        
        # Lead growth is driven by conversion improvements and traffic growth
        # Don't apply additional compounding - conversion rate improvements already account for it
        # But account for lead quality improvements over time
        
        # Calculate MQLs (using Lead → MQL rate, improving over time)
        lead_to_mql_rate = 0.45  # Starting at 45% (improving from 40.65%)
        if month >= '2026-06':
            lead_to_mql_rate = 0.50  # Improved after Q2
        if month >= '2026-09':
            lead_to_mql_rate = 0.55  # Further improved in Q3-Q4
        
        total_mqls = total_leads * lead_to_mql_rate
        
        # Calculate customers (using MQL → Customer rate)
        mql_to_customer_rate = 0.10  # Target 10% (improving from 4.76%)
        if month >= '2026-06':
            mql_to_customer_rate = 0.12
        if month >= '2026-09':
            mql_to_customer_rate = 0.15
        
        total_customers = total_mqls * mql_to_customer_rate
        
        # Calculate growth rates
        sessions_growth = ((total_sessions - prev_sessions) / prev_sessions * 100) if prev_sessions > 0 else 0
        leads_growth = ((total_leads - prev_leads) / prev_leads * 100) if prev_leads > 0 else 0
        mqls_growth = ((total_mqls - prev_mqls) / prev_mqls * 100) if prev_mqls > 0 else 0
        
        forecasts[month] = {
            'sessions': round(total_sessions),
            'leads': round(total_leads),
            'mqls': round(total_mqls),
            'customers': round(total_customers),
            'sessions_growth': round(sessions_growth, 2),
            'leads_growth': round(leads_growth, 2),
            'mqls_growth': round(mqls_growth, 2),
            'conversion_rate': round(conversion_rate * 100, 2),
            'lead_to_mql_rate': round(lead_to_mql_rate * 100, 2),
            'mql_to_customer_rate': round(mql_to_customer_rate * 100, 2),
            'baseline_sessions': round(baseline_sessions),
            'initiative_traffic': initiative_traffic,
            'seasonality_factor': seasonality_factor,
            'quarter': quarter
        }
        
        prev_sessions = total_sessions
        prev_leads = total_leads
        prev_mqls = total_mqls
    
    return forecasts

def generate_all_scenarios():
    """Generate forecasts for all scenarios."""
    scenarios = {}
    for scenario in ['conservative', 'realistic', 'aggressive']:
        scenarios[scenario] = generate_forecasts(scenario)
    return scenarios

if __name__ == "__main__":
    all_scenarios = generate_all_scenarios()
    
    # Save all scenarios
    output_file = Path(__file__).parent.parent / "06-DATA-ANALYSIS" / "monthly_forecasts_2026.json"
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_scenarios, f, indent=2, ensure_ascii=False)
    
    # Print realistic scenario summary
    realistic = all_scenarios['realistic']
    print("2026 Monthly Forecasts (Realistic Scenario):\n")
    print(f"{'Month':<10} {'Sessions':<12} {'Leads':<8} {'MQLs':<8} {'Customers':<10} {'Sessions Growth':<15}")
    print("-" * 75)
    for month, data in realistic.items():
        print(f"{month:<10} {data['sessions']:<12,} {data['leads']:<8,} {data['mqls']:<8,} {data['customers']:<10,} {data['sessions_growth']:>6.2f}%")
    
    print(f"\n\nDecember 2026 Targets (Realistic):")
    dec = realistic['2026-12']
    print(f"  Sessions: {dec['sessions']:,}")
    print(f"  Leads: {dec['leads']:,}")
    print(f"  MQLs: {dec['mqls']:,}")
    print(f"  Customers: {dec['customers']:,}")
    
    print(f"\nOutput saved to: {output_file}")

