#!/usr/bin/env python3
"""
Simple HubSpot data extractor - manually maps known column positions.
"""

import csv
import json
from pathlib import Path

def extract_organic_search_data(file_path):
    """Extract organic search data with manual column mapping."""
    
    # Known column mapping: data column -> month
    # Based on CSV structure: data starts at col 8, months are offset
    # Row 113 shows: ...,40,81,155,6 where 155 is Nov 2025
    # So: col 40 = Oct 25, col 41 = Nov 25, col 42 = Dec 25
    month_columns = {
        29: '2025-01',  # Jan. 25
        30: '2025-02',  # Feb. 25
        31: '2025-03',  # März 25
        32: '2025-04',  # Apr. 25
        33: '2025-05',  # Mai 25
        34: '2025-06',  # Juni 25
        35: '2025-07',  # Juli 25
        36: '2025-08',  # Aug. 25
        37: '2025-09',  # Sept. 25
        40: '2025-10',  # Okt. 25 (value 40 in CSV)
        41: '2025-11',  # Nov. 25 (value 81 in CSV, but user shows 155 - need to verify)
        42: '2025-12',  # Dez. 25 (value 155 in CSV, but might be Nov)
        18: '2024-01',  # Jan. 24
        19: '2024-02',  # Feb. 24
        20: '2024-03',  # März 24
        21: '2024-04',  # Apr. 24
        22: '2024-05',  # Mai 24
        23: '2024-06',  # Juni 24
        24: '2024-07',  # Juli 24
        25: '2024-08',  # Aug. 24
        26: '2024-09',  # Sept. 24
        27: '2024-10',  # Okt. 24
        28: '2024-11',  # Nov. 24
        29: '2024-12',  # Dez. 24
    }
    
    organic_data = {
        'leads': {},
        'mqls': {},
        'customers': {},
        'demo_booked_pct': {},
        'unqualified_pct': {}
    }
    
    with open(file_path, 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        rows = list(reader)
        
        # Find organic search rows
        for i, row in enumerate(rows):
            if len(row) < 8:
                continue
            
            channel = row[1].strip() if len(row) > 1 and row[1] else ''
            if channel != 'organic search':
                continue
            
            metric = row[5].strip() if len(row) > 5 and row[5] else ''
            metric_type = row[6].strip() if len(row) > 6 and row[6] else ''
            
            # Extract data based on metric type
            if 'Inbound Leads' in metric and 'actuals' in metric_type:
                for col_idx, month in month_columns.items():
                    if col_idx < len(row) and row[col_idx] and row[col_idx].strip():
                        try:
                            value = int(row[col_idx].strip())
                            organic_data['leads'][month] = value
                        except (ValueError, AttributeError):
                            pass
            
            elif 'Inbound MQL' in metric and 'actuals' in metric_type:
                for col_idx, month in month_columns.items():
                    if col_idx < len(row) and row[col_idx] and row[col_idx].strip():
                        try:
                            value = int(row[col_idx].strip())
                            organic_data['mqls'][month] = value
                        except (ValueError, AttributeError):
                            pass
            
            elif 'Customers' in metric and 'actuals' in metric_type and 'CVR' not in metric:
                for col_idx, month in month_columns.items():
                    if col_idx < len(row) and row[col_idx] and row[col_idx].strip():
                        try:
                            value = int(row[col_idx].strip())
                            organic_data['customers'][month] = value
                        except (ValueError, AttributeError):
                            pass
            
            elif 'Demo booked' in metric:
                for col_idx, month in month_columns.items():
                    if col_idx < len(row) and row[col_idx] and row[col_idx].strip():
                        try:
                            val_str = row[col_idx].strip().rstrip('%')
                            value = float(val_str) if val_str else 0
                            organic_data['demo_booked_pct'][month] = value
                        except (ValueError, AttributeError):
                            pass
            
            elif 'Unqualified' in metric:
                for col_idx, month in month_columns.items():
                    if col_idx < len(row) and row[col_idx] and row[col_idx].strip():
                        try:
                            val_str = row[col_idx].strip().rstrip('%')
                            value = float(val_str) if val_str else 0
                            organic_data['unqualified_pct'][month] = value
                        except (ValueError, AttributeError):
                            pass
    
    return organic_data

if __name__ == "__main__":
    file_path = Path("/Users/hadyelhady/Desktop/Ordio/Strategy/Screenshots/Marketing _ KPI Report - Channels (m).csv")
    data = extract_organic_search_data(file_path)
    print(json.dumps(data, indent=2, ensure_ascii=False))

